03.28Парсер google через ajax search api
Все файлы должны быть в utf-8.
# coding:utf-8 # author: Rushter # site: http://klipner.ru import urllib2,random,re,time,json,urllib ff = open("ua.txt").readlines() ques = open("query.txt").readlines() out = open("parsed_query.txt","a") err = 0 for query in ques: try: for j in range(0,12,4): header = {"User-Agent":random.choice(ff)} lnk = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&start={0}&q={1}".format(str(j),urllib.quote(query.strip("\r\n"))) page_request = urllib2.Request(url=lnk, headers=header) page = urllib2.urlopen(url=page_request) pp = json.load(page)["responseData"]["results"] try: for i in xrange(4): out.write(pp[i]["unescapedUrl"]+"\n") out.flush() except: if err>100: break except: if err>100: break err = err+1
-
ppbi
-
http://klipner.ru rushter
-
Cabal
-
Cabal
-
Cabal
-
ppbi
-
Мария Хуана
-
dr.canibal
-
http://klipner.ru rushter
-
dr.canibal
-
http://klipner.ru rushter
-
http://djangoproject.su/ Djangoman
-
http://klipner.ru rushter
-
Ug
-
http://klipner.ru rushter
-
Ug
-
http://klipner.ru rushter
-
Ug
-
http://klipner.ru rushter