#!/usr/bin/env python import argparse import configparser import os import pathlib import re import argcomplete mincount = 10 def getconf(): config = configparser.ConfigParser() configfile = os.path.join(pathlib.Path.home(), ".config", "filenamestat.ini") if not os.path.isfile(configfile): config["DEFAULT"]["minlen"] = "3" config["DEFAULT"]["mincount"] = "10" config["DEFAULT"]["stopwords"] = "and,the,for" with open(configfile, "w") as fh: config.write(fh) return config config.read(configfile) return config def filenamestats(paths): """ Finds all files in a directory and returns name stats :param path: str, path :returns: dict where the keys are found words and the values are the occurences. """ words = {} for path in paths: for directory in os.walk(path): for filename in directory[2]: # Remove extension filename = os.path.splitext(filename.strip())[0].lower() # Remove non-alphanumeric filename = re.sub(r"[\W_]+", " ", filename) # Remove excessive whitespace filename = re.sub(r"\s+", " ", filename) filenameparts = filename.split(" ") for word in filenameparts: words.setdefault(word, 0) words[word] += 1 return words def dumpstats(words, stopwords=None, minlen=3, mincount=mincount): stopwords = stopwords or [] words = sorted(words.items(), key=lambda x: x[1], reverse=True) for (word, count) in words: if word in stopwords: continue if len(word) >= minlen and count >= mincount: print(f"{word}: {count}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Filename stats") parser.add_argument("paths", type=str, nargs="*", default=".", help="Path to search") parser.add_argument( "--mincount", "-m", type=int, default=mincount, help=f"Show only results with that many matches ({mincount})", ) parser.add_argument( "--stopwords", "-s", nargs="*", default=[], help="Ignore those words", ) args = parser.parse_args() argcomplete.autocomplete(parser) config = getconf() stopwords = args.stopwords + config["DEFAULT"]["stopwords"].split(",") mincount = args.mincount or int(config["DEFAULT"]["mincount"]) words = filenamestats(args.paths) dumpstats( words, stopwords=stopwords, minlen=int(config["DEFAULT"]["minlen"]), mincount=mincount, )