# encoding=utf-8importredisfromhashlibimportmd5classSimpleHash(object):def__init__(self,cap,seed):self.cap=capself.seed=seeddefhash(self,value):ret=0foriinrange(len(value)):ret+=self.seed*ret+ord(value[i])return(self.cap-1)&retclassBloomFilter(object):def__init__(self,host='localhost',port=6379,db=0,blockNum=1,key='bloomfilter'):""" :param host: the host of Redis :param port: the port of Redis :param db: witch db in Redis :param blockNum: one blockNum for about 90,000,000; if you have more strings for filtering, increase it. :param key: the key's name in Redis """self.server=redis.Redis(host=host,port=port,db=db)self.bit_size=1<<31# Redis的String类型最大容量为512M,现使用256Mself.seeds=[5,7,11,13,31,37,61]self.key=keyself.blockNum=blockNumself.hashfunc=[]forseedinself.seeds:self.hashfunc.append(SimpleHash(self.bit_size,seed))defisContains(self,str_input):ifnotstr_input:returnFalsem5=md5()m5.update(str_input)str_input=m5.hexdigest()ret=Truename=self.key+str(int(str_input[0:2],16)%self.blockNum)forfinself.hashfunc:loc=f.hash(str_input)ret=ret&self.server.getbit(name,loc)returnretdefinsert(self,str_input):m5=md5()m5.update(str_input)str_input=m5.hexdigest()name=self.key+str(int(str_input[0:2],16)%self.blockNum)forfinself.hashfunc:loc=f.hash(str_input)self.server.setbit(name,loc,1)if__name__=='__main__':""" 第一次运行时会显示 not exists!,之后再运行会显示 exists! """bf=BloomFilter()ifbf.isContains('http://www.baidu.com'):# 判断字符串是否存在print'exists!'else:print'not exists!'bf.insert('http://www.baidu.com')