facebook facebook twitter rss

N4 Bing Crawler

Author: n4sss , Published: 03-09-2014
# This is just another simple bing crawler using requests and threading a lit bit
# Usage: bing_scan.py -f <dork list> -o <output log> -t <threads>
#
# By N4sss <Twt: @n4sss>
# p0cl4bs inc. | Janissaries.org

import requests, re
from optparse import OptionParser
from threading import *

screenLock = Semaphore(value = 1)
banner = """
_ _ _ _ _
_ _| | | | |__(_)_ _ __ _ __ _ _ __ ___ __ _| |___ _ _
| ' \_ _| | '_ \ | ' \/ _` | / _| '_/ _` \ V V / / -_) '_|
|_||_||_| |_.__/_|_||_\__, | \__|_| \__,_|\_/\_/|_\___|_|
|___/
This is another bing crawler using requests and threading a lit bit
-> @N4sss
"""

def save_buf(fileName, content):
write = open(fileName,'ab')
write.write(bytes(content + "\r\n", 'UTF-8'))
write.close()

def set_sem(threadNum):
global screenLock
screenLock = Semaphore(value = threadNum)

def bingScan(dork, outPutLog):
try:
screenLock.acquire()
i = 1
while(i <= 401):
print('[+] dork: ' + dork + ' page: ' + str(i))
r = requests.get('http://www.bing.com/search?q=' + dork + '&count=50&first=' + str(i))
regex = re.compile('<a href="(.*?)" h=')
links = regex.findall(r.text)
for link in links:
if 'http' in link and not re.search('msn|microsoft|php-brasil|facebook|4shared|bing|imasters|phpbrasil|php.net|yahoo|scriptbrasil|under-linux|magentocommerce|forumweb|ehow', link):
save_buf(outPutLog, link)
i += 10
screenLock.release()
except:
pass

def init(dorkList, outPutLog):
dorks = open(dorkList)
for dork in dorks:
t = Thread(target=bingScan, args=(dork.strip(), outPutLog))
t.start()

def main():
parser = OptionParser("Usage: bing_crawler.py -f <dork list> -o <output log> -t <threads>\nbing_scan.py --help to full help")
parser.add_option('-f', '--file', dest='dorkList', type='string', \
help='Dork file')
parser.add_option('-o', '--output', dest='outPutLog', type='string', \
help='Output log to write results')
parser.add_option('-t', '--thread', dest='thread', default=1, type='int', \
help='Total dork threads per time | Default: 1')
(options, args) = parser.parse_args()
dorkList = options.dorkList
outPutLog = options.outPutLog
thread = options.thread

if (dorkList == None) | (outPutLog == None):
print(banner)
print(parser.usage)
exit(0)

print(banner)
set_sem(thread)
init(dorkList, outPutLog)

if __name__ == "__main__":
main()

Like us on Facebook :