10.04Парсим wordstat без капчи
#coding:utf-8 from grab import Grab import threading,Queue,re,time,random from urllib import quote_plus lock = threading.RLock() queue = Queue.Queue() class WordStat(object): def __init__(self): self.min = 30 self.max = 3000 self.used = [] self.cook = self.magic_cookie() open("parsing.txt","w") def write(self,data,name,mode='a+'): lock.acquire() put = open("%s.txt"%(name),mode) put.write(data) put.flush() put.close() lock.release() def parse(self,key): page = 1 query_safe = quote_plus(key.encode('utf-8')) g = Grab() for i in xrange(3): try: url = 'http://advq.yandex.ru/?cmd=words&page=%d&text=%s&geo=&text_geo=' % (page, query_safe) g.setup(url=url,cookies=self.cook,timeout=150,) g.request() for tr in g.xpath('/html/body/form/table[2]/tbody/tr/td[4]/table/tbody/tr[3]/td[1]/table/tbody//tr[@class="tlist"]'): items = [x.strip().encode('utf-8') for x in tr.xpath('td//text()')] kkey = self.clear(items[1]) count = items[-1] if (kkey not in self.used) and (int(count)>self.min): queue.put(kkey) self.used.append(kkey) if (int(count)0: self.parse(queue.get_nowait()) def magic_cookie(self): g = Grab() g.setup(url='http://kiks.yandex.ru/su/',timeout=150) g.request() return g.response.cookies w = WordStat() w.go()
-
http://www.seocoder.org/ SeoCoder
-
http://klipner.ru/ RushteR
-
http://seo13.ru SkYmAn
-
http://klipner.ru/ RushteR
-
http://seo13.ru SkYmAn