import os import sys from crawlsite import crawlsite import urlparse import cgi class createRewriteRules: strRewriteRulesEntries = "" strExplainRewriteRulesEntries = "" def __init__(self,urltocrawl = ""): if urltocrawl.strip()=="": if len(sys.argv) < 2: self.url = raw_input("Supply url to crawl: ") else: self.url = sys.argv[1] else: self.url = urltocrawl.strip() crawlsiteObj = crawlsite(self.url) dUniqueUrls = crawlsiteObj.crawl() self.WriteRewriteRules(dUniqueUrls) def BuildUniquePaths(self,dUniqueUrls): dUniquePaths = {} for u in dUniqueUrls: strStrippedURL = dUniqueUrls[u]['url'].lower() protocol = dUniqueUrls[u]['protocol'].lower() server = dUniqueUrls[u]['server'].lower() path = dUniqueUrls[u]['path'].lower() query = dUniqueUrls[u]['query'].lower() fragment = dUniqueUrls[u]['fragment'] """ print "*******************" print "url =" + strStrippedURL print "protocol =" + protocol print "server =" + server print "path =" + path print "query =" + query print "fragment =" + fragment """ dParsedQueryString = cgi.parse_qs(query) if not (path in dUniquePaths): lQueryKeys = [] lThisQuery = [] if len(dParsedQueryString) > 0: for key in dParsedQueryString: lThisQuery.append(key) lThisQuery.sort() lQueryKeys.append(lThisQuery) dUniquePaths[path] = lQueryKeys else: lThisQuery = [] if len(dParsedQueryString) > 0: for key in dParsedQueryString: lThisQuery.append(key) lThisQuery.sort() lCurrentKeyListGroup = dUniquePaths[path] bKeyListPresent = False for lKeyListGroup in lCurrentKeyListGroup: if lKeyListGroup == lThisQuery: bKeyListPresent = True if not bKeyListPresent: lCurrentKeyListGroup.append(lThisQuery) dUniquePaths[path] = lCurrentKeyListGroup return dUniquePaths def ListSizeCompare(self,lx, ly): lxLength = len(lx) lyLength = len(ly) if lxLength>lyLength: return -1 elif lxLength==lyLength: return 0 else: return 1 def WriteRewriteRules(self,dUniqueUrls): urlToCheckParts = urlparse.urlsplit(self.url) server = urlToCheckParts[1].lower() strRewriteFileName = server + "-rewriterules.txt" rewriteRulesFile = open(strRewriteFileName, "w") #Opens the file again, this time in write-mode dUniquePaths = self.BuildUniquePaths(dUniqueUrls) self.strRewriteRulesEntries = "" self.strExplainRewriteRulesEntries = "" paths = dUniquePaths.keys() paths.sort(self.ListSizeCompare) for path in paths: lKeyGroups = dUniquePaths[path] lKeyGroups.sort(self.ListSizeCompare)#this needs to be sorted in order of the number of keys in each sub list descending strCurrentFileName = str(path).lower() strShortFileName = strCurrentFileName.replace(".aspx", "") if len(lKeyGroups)== 0: #RewriteRule /kf900/range(\/{0,1})(.*?) /kf900/range.aspx$2 [I,L] strRewriteRule = "RewriteRule "+strShortFileName+"(\/{0,1})(.*?) " strRewriteRule = strRewriteRule + strCurrentFileName+"$2 [I,L]" strExplainRule = strCurrentFileName +" should become " + strShortFileName self.strRewriteRulesEntries = self.strRewriteRulesEntries + strRewriteRule+"\n" self.strExplainRewriteRulesEntries = self.strExplainRewriteRulesEntries + strExplainRule +"\n" else: for lKeys in lKeyGroups: lKeys.sort() #there is now a number of these if len(lKeys) == 0: strRewriteRule = "RewriteRule "+strShortFileName+"(\/{0,1})(.*?) " strRewriteRule = strRewriteRule + strCurrentFileName+"?$2 [I,L]" strExplainRule = strCurrentFileName +" should become " + strShortFileName else: if len(lKeys) == 1: strRewriteRule = "RewriteRule "+strShortFileName+"/(.*?)(\/{0,1}) " strRewriteRule = strRewriteRule + strCurrentFileName+"?"+lKeys[0]+"=$1 [I,L]" strExplainRule = strCurrentFileName +"?"+lKeys[0]+"=%VAR% should become " + strShortFileName+"/%VAR%" else: strStartRewriteRule = "RewriteRule "+strShortFileName+"/" strEndRewriteRule = strCurrentFileName+"?" strExplainRuleStart = strCurrentFileName+"?" strExplainRuleEnd = " should become "+strShortFileName+"/" iKeyCount = 1 for key in lKeys: if iKeyCount == len(lKeys): strStartRewriteRule = strStartRewriteRule + key +"/(.*?)(\/{0,1})" strEndRewriteRule = strEndRewriteRule+key+"=$"+str(iKeyCount)+" [I,L]" strExplainRuleStart = strExplainRuleStart+key+"=%VAR%" strExplainRuleEnd = strExplainRuleEnd+key+"/%VAR%" else: strStartRewriteRule = strStartRewriteRule + key +"/(.*?)/" strEndRewriteRule = strEndRewriteRule+key+"=$"+str(iKeyCount) +"&" strExplainRuleEnd = strExplainRuleEnd+key+"/%VAR%/" strExplainRuleStart = strExplainRuleStart+key+"=%VAR%&" iKeyCount = iKeyCount +1 strRewriteRule = strStartRewriteRule + " " + strEndRewriteRule strExplainRule = strExplainRuleStart + strExplainRuleEnd self.strRewriteRulesEntries = self.strRewriteRulesEntries + strRewriteRule+"\n" self.strExplainRewriteRulesEntries = self.strExplainRewriteRulesEntries + strExplainRule+"\n" self.strExplainRewriteRulesEntries = self.strExplainRewriteRulesEntries rewriteRulesFile.write(self.strRewriteRulesEntries) rewriteRulesFile.close() return def GetRewriteRules(self): return [self.strRewriteRulesEntries,self.strExplainRewriteRulesEntries] if __name__ == '__main__': createRewriteRulesObj = createRewriteRules() strRewriteRules = createRewriteRulesObj.GetRewriteRules() print "\n\nRewrite Rules" print strRewriteRules[0] print "\nExplantion" print strRewriteRules[1]