Count lines of code in directory using Python

Question:

I have a project whose lines of code I want to count. Is it possible to count all the lines of code in the file directory containing the project by using Python?

Asked By: Daniel

||

Answers:

from os import listdir
from os.path import isfile, join

def countLinesInPath(path,directory):
    count=0
    for line in open(join(directory,path), encoding="utf8"):
        count+=1
    return count

def countLines(paths,directory):
    count=0
    for path in paths:
        count=count+countLinesInPath(path,directory)
    return count

def getPaths(directory):
    return [f for f in listdir(directory) if isfile(join(directory, f))]

def countIn(directory):
    return countLines(getPaths(directory),directory)

To count all the lines of code in the files in a directory, call the "countIn" function, passing the directory as a parameter.

Answered By: Daniel

Here’s a function I wrote to count all lines of code in a python package and print an informative output. It will count all lines in all .py

import os

def countlines(start, lines=0, header=True, begin_start=None):
    if header:
        print('{:>10} |{:>10} | {:<20}'.format('ADDED', 'TOTAL', 'FILE'))
        print('{:->11}|{:->11}|{:->20}'.format('', '', ''))

    for thing in os.listdir(start):
        thing = os.path.join(start, thing)
        if os.path.isfile(thing):
            if thing.endswith('.py'):
                with open(thing, 'r') as f:
                    newlines = f.readlines()
                    newlines = len(newlines)
                    lines += newlines

                    if begin_start is not None:
                        reldir_of_thing = '.' + thing.replace(begin_start, '')
                    else:
                        reldir_of_thing = '.' + thing.replace(start, '')

                    print('{:>10} |{:>10} | {:<20}'.format(
                            newlines, lines, reldir_of_thing))


    for thing in os.listdir(start):
        thing = os.path.join(start, thing)
        if os.path.isdir(thing):
            lines = countlines(thing, lines, header=False, begin_start=start)

    return lines

To use it, just pass the directory you’d like to start in. For example, to count the lines of code in some package foo:

countlines(r'...foo')

Which would output something like:

     ADDED |     TOTAL | FILE               
-----------|-----------|--------------------
        5  |        5  | .__init__.py       
       539 |       578 | .bar.py          
       558 |      1136 | .bazqux.py         
Answered By: Bryce93

This is derived from Daniel’s answer (though refactored enough that this won’t be obvious). That one doesn’t recurse through subdirectories, which is the behavior I wanted.

from os import listdir
from os.path import isfile, isdir, join

def item_line_count(path):
    if isdir(path):
        return dir_line_count(path)
    elif isfile(path):
        return len(open(path, 'rb').readlines())
    else:
        return 0

def dir_line_count(dir):
    return sum(map(lambda item: item_line_count(join(dir, item)), listdir(dir)))
Answered By: Daniel Weaver

pygount will display all the files in the folder, each with a count of codes lines (excluding documentation)

https://pypi.org/project/pygount/

pip install pygount

To list the results for the current directory run:

pygount ~/path_to_directory
Answered By: Vilmar Rafael

This has a slight air of homework assignment 🙂 — nonetheless, it’s a worthwhile exercise, and Bryce93’s formatting is nice. I think many would be unlikely to use Python for this given that it can be done quickly with a couple of shell commands, for example:

cat $(find . -name "*.py") | grep -E -v '^s*$|^s*#' | wc -l

Note that none of these solutions accounts for multiline (''') comments.

Answered By: JP Lodine

As an addition to the pygount answer, they just added the option --format=summary to get the total number of lines in different file types in a directory.

pygount --format=summary ./your-directory

could output somthing like

  Language     Code    %     Comment    %
-------------  ----  ------  -------  ------
XML            1668   48.56       10    0.99
Python          746   21.72      150   14.90
TeX             725   21.11       57    5.66
HTML            191    5.56        0    0.00
markdown         58    1.69        0    0.00
JSON             37    1.08        0    0.00
INI              10    0.29        0    0.00
Text              0    0.00      790   78.45
__duplicate__     0    0.00        0    0.00
-------------  ----  ------  -------  ------
Sum total      3435             1007
Answered By: user7642017

Based on Bryce93’s answer, with code_only option to exclude comments, docstrings, and empty lines from line count:

import os

def countlines(rootdir, total_lines=0, header=True, begin_start=None,
               code_only=True):
    def _get_new_lines(source):
        total = len(source)
        i = 0
        while i < len(source):
            line = source[i]
            trimline = line.lstrip(" ")

            if trimline.startswith('#') or trimline == '':
                total -= 1
            elif '"""' in trimline:  # docstring begin
                if trimline.count('"""') == 2:  # docstring end on same line
                    total -= 1
                    i += 1
                    continue
                doc_start = i
                i += 1
                while '"""' not in source[i]:  # docstring end
                    i += 1
                doc_end = i
                total -= (doc_end - doc_start + 1)
            i += 1
        return total

    if header:
        print('{:>10} |{:>10} | {:<20}'.format('ADDED', 'TOTAL', 'FILE'))
        print('{:->11}|{:->11}|{:->20}'.format('', '', ''))

    for name in os.listdir(rootdir):
        file = os.path.join(rootdir, name)
        if os.path.isfile(file) and file.endswith('.py'):
            with open(file, 'r') as f:
                source = f.readlines()

            if code_only:
                new_lines = _get_new_lines(source)
            else:
                new_lines = len(source)
            total_lines += new_lines

            if begin_start is not None:
                reldir_of_file = '.' + file.replace(begin_start, '')
            else:
                reldir_of_file = '.' + file.replace(rootdir, '')

            print('{:>10} |{:>10} | {:<20}'.format(
                    new_lines, total_lines, reldir_of_file))

    for file in os.listdir(rootdir):
        file = os.path.join(rootdir, file)
        if os.path.isdir(file):
            total_lines = countlines(file, total_lines, header=False,
                                     begin_start=rootdir, code_only=code_only)
    return total_lines
Answered By: OverLordGoldDragon

If you want to count how many lines are in your project, create a script inside of your project folder and paste the following into it:

import os

directory = "[project_directory]"
directory_depth = 100 # How deep you would like to go
extensions_to_consider = [".py", ".css"]  # Change to ["all"] to include all extensions
exclude_filenames = ["venv", ".idea", "__pycache__", "cache"]
skip_file_error_list = True

this_file_dir = os.path.realpath(__file__)

print("Path to ignore:", this_file_dir)
print("=====================================")
def _walk(path, depth):
    """Recursively list files and directories up to a certain depth"""
    depth -= 1
    with os.scandir(path) as p:
        for entry in p:

            skip_entry = False
            for fName in exclude_filenames:
                if entry.path.endswith(fName):
                    skip_entry = True
                    break

            if skip_entry:
                print("Skipping entry", entry.path)
                continue

            yield entry.path
            if entry.is_dir() and depth > 0:
                yield from _walk(entry.path, depth)

print("Caching entries")
files = list(_walk(directory, directory_depth))
print("=====================================")

print("Counting Lines")
file_err_list = []
line_count = 0
len_files = len(files)
for i, file_dir in enumerate(files):

    if file_dir == this_file_dir:
        print("=[Rejected file directory", file_dir, "]=")
        continue

    if not os.path.isfile(file_dir):
        continue

    skip_File = True
    for ending in extensions_to_consider:
        if file_dir.endswith(ending) or ending == "all":
            skip_File = False

    if not skip_File:
        try:
            file = open(file_dir, "r")
            local_count = 0
            for line in file:
                if line != "n":
                    local_count += 1
            print("({:.1f}%)".format(100*i/len_files), file_dir, "|", local_count)
            line_count += local_count
            file.close()
        except:
            file_err_list.append(file_dir)
            continue
print("=====================================")
print("File Count Errors:", len(file_err_list))
if not skip_file_error_list:
    for file in file_err_list:
        print(file_err_list)

print("=====================================")
print("Total lines |", line_count)

There’s probably faster and more efficient ways to do this, but this is a nice start.

Variable Information

directory is the project directory you want to be counted

directory_depth is how deep within the project infastructure
i.e. a depth of 3 would mean it will only scan the following depth:

  • project_dir
    • sub_dir
      • sub2_dir

extensions_to_consider is the file extensions to count code. If you only want to count .py files, you set extensions_to_consider = [".py"]

exclude_filenames is an array of file names (and directories) you don’t want to consider the script to count code for.

skip_file_error_list is a boolean variable. If you wish to see a printout of all errors while counting, set to True. Otherwise set to False.

How To Run

Run script using the Python compiler.
To run in terminal

python path_to_file.py

or

python3 path_to_file.py

Answered By: Connor White

Here’s another one, using pathlib. Lists individual (relative) file paths with line count, total number of files, and total line count.

import pathlib


class LoC(object):
    suffixes = ['.py']
    skip = ['name of dir or file to skip', ...]

    def count(self, path, init=True):
        path = pathlib.Path(path)
        if path.name in self.skip:
            print(f'skipped: {path.relative_to(self.root)}')
            return
        if init:
            self.root = path
            self.files = 0
            self.lines = 0
        if path.is_dir():
            # recursive case
            for item in path.iterdir():
                self.count(path=item, init=False)
        elif path.is_file() and path.suffix in self.suffixes:
            # base case
            with path.open(mode='r') as f:
                line_count = len(f.readlines())
            print(f'{path.relative_to(self.root)}: {line_count}')
            self.files += 1
            self.lines += line_count
        if init:
            print(f'n{self.lines} lines in {self.files} files')

Note I omitted the __init__ method for clarity.

Usage example:

loc = LoC()
loc.count('/path/to/your/project/directory')
Answered By: djvg

Use radon

python3 -mpip install radon

radon raw -s pkg_dir/

** Total **
    LOC: 2994
    LLOC: 1768
    SLOC: 1739
    Comments: 71
    Single comments: 29
    Multi: 818
    Blank: 408
    - Comment Stats
        (C % L): 2%
        (C % S): 4%
        (C + M % L): 30%

it will also calculate cyclomatic complexity

a@debian:~/build/clean/scte35-threefive$ radon cc  -a threefive
threefive/base.py
    M 61:4 SCTE35Base.kv_clean - A
    M 85:4 SCTE35Base.load - A
    M 95:4 SCTE35Base._chk_var - A
    C 9:0 SCTE35Base - A
    M 34:4 SCTE35Base.as_hms - A
    M 79:4 SCTE35Base._chk_nbin - A
    M 17:4 SCTE35Base.__repr__ - A
    M 20:4 SCTE35Base.as_90k - A
    M 27:4 SCTE35Base.as_ticks - A
    M 48:4 SCTE35Base.get - A
    M 54:4 SCTE35Base.get_json - A
threefive/bitn.py
    C 9:0 BitBin - A
    M 30:4 BitBin.as_int - A
    M 47:4 BitBin.as_charset - A
    C 99:0 NBin - A
    M 133:4 NBin.add_int - A
    M 170:4 NBin.reserve - A

      ..... 
246 blocks (classes, functions, methods) analyzed.
Average complexity: A (1.9024390243902438)
Answered By: Leroy Scandal

I just did a variant of @Bryce93 ‘s response for a python + flask project(s)… ran some pivot tables on the outcome .csv file and the like (I manually marked files as ‘active’ downstream)… cheers

import os
import pandas as pd


def countlines(start, begin_start=None):
    global files

    for thing in os.listdir(start):
        thing = os.path.join(start, thing)
        if os.path.isfile(thing):
            if thing.endswith('.py') or thing.endswith('.html'):
                with open(thing, 'r') as f:
                    lines = f.readlines()
                    count = len([l for l in lines if not l.strip().startswith('#')])
                    functions, classes, comments = 0, 0, 0
                    if thing.endswith('.py'):
                        functions = len([
                            l for l in lines if l.strip().startswith('def ') 
                            and l.strip().endswith('):')
                        ])
                        classes = len([
                            l for l in lines if l.strip().startswith('class ') 
                            and l.strip().endswith('):')
                        ])
                        comments = len([l for l in lines if l.strip().startswith('#')])
                        language = 'python'
                    elif thing.endswith('.html'):
                        comments = len([l for l in lines if l.strip().startswith('<!--')])
                        language = 'jinja'
                    else:
                        raise Exception(thing)

                    path = str(thing)
                    folder = '/'.join(path.split(repo)[-1].split('/')[:-1])

                    files.append({
                        'path': path,
                        'repo': repo,
                        'language': language,
                        'filetype': thing.split('.')[-1],
                        'folder': folder,
                        'filename': thing.split('/')[-1],
                        'lines': count,
                        'functions': functions,
                        'classes': classes,
                        'comments': comments,
                    })

    for thing in os.listdir(start):
        thing = os.path.join(start, thing)
        if os.path.isdir(thing):
            countlines(thing, begin_start=start)


files = []
repo = '<repo1>'
countlines('<path>/<repo1>')
master = pd.DataFrame(files)
files = []
repo = '<repo2>'
countlines('<path>/<repo2>')
master = pd.concat([master, pd.DataFrame(files)], ignore_index=False, sort=False)
master['active'] = False
master.sort_values(by=['repo', 'folder', 'language', 'filename'])

master.to_csv('../<blah>.csv')
Answered By: Chris
Categories: questions Tags: ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.