Getting the latest git commit id from URL on internal git using Python

Question:

With the following function I am extracting the latest git commit id in a short form and write it into a text file.

from os.path import exists
from subprocess import Popen, PIPE

def get_git_commit_id(txt_file: str) -> str:
    """
    Gets the latest git commit id and places in the txt file if not exists
    Parameters:
    ==========
    :param txt_file: name of the txt file
    Returns:
    ==========
    :return: the latest git commit id
    """
    if not exists(txt_file):
        print(f"'{txt_file}' did not exist before")  # for logging
    try:
        process = Popen("git rev-parse --short HEAD", stdout=PIPE)
        output = process.communicate()[0]
    except Exception as error:
        output = bytes("latest", "utf-8")
        print("It could not read the .git", error)  # for logging
    with open(txt_file, "w", encoding="utf8") as file:
        file.write(output.decode("utf-8"))
    file = open(txt_file, "r", encoding="utf8")
    git_commit_id = file.readline().strip()
    return git_commit_id

get_git_commit_id("git_commit_id.txt")

However this code only works when I have my .git dir inside my project.

How can extract the latest git commit id in a short form from the URL where my project placed on the internal git?


References:

Asked By: Taras

||

Answers:

Firstly installing gitpython via pip as follow pip install gitpython

after;

import git

repo_url = 'https://your-repository-url.com/your-repo.git'

# clone the repository
repo = git.Repo.clone_from(repo_url, '/path/to/local/repo', depth=1)

# get the latest commit
latest_commit = repo.head.commit

# print the commit id
print(latest_commit.hexsha)

Answered By: Hakan Çelik

With the following code I was able to get the desired output:

# imports
from os import devnull
from os.path import exists
from logging import getLogger
from urllib.parse import parse_qsl
from urllib.request import urlopen
from ssl import create_default_context
from xml.dom.minidom import parseString
from subprocess import call, Popen, STDOUT, PIPE

logger = getLogger(__name__)

project_git_url = "https://path/to/your/git.com/?p=your-project;a=summary"
cert = None

def get_git_commit_id(txt_file: str) -> str:
    """
    Gets the latest git commit id and places in the txt file if not exists
    Parameters:
    ==========
    :param txt_file: name of the txt file
    Returns:
    ==========
    :return: the latest git commit id
    """
    if not exists(txt_file):
        logger.info(
            msg=f"The following text file does not exist : '{txt_file}'. Now it was created.",
            exc_info=False,
        )

    output = bytes("latest", "utf-8")

    git_check = call(["git", "master"], stderr=STDOUT, stdout=open(devnull, 'w'))

    if git_check != 0:
        process = Popen("git rev-parse --short HEAD", stdout=PIPE)
        output = process.communicate()[0]
    else:
        logger.debug(
            msg=f"It could not read the .git repository to extract the latest git commit id.",
            exc_info=False,
        )
        try:
            url = urlopen(project_git_url, context=create_default_context(cafile=cert))
            url_as_xml = url.read().decode("utf-8")

            docs = parseString(url_as_xml)
            html = docs.getElementsByTagName("html")[0]
            body = html.getElementsByTagName("body")[0]

            for div_element in body.getElementsByTagName("div"):
                if 'page_nav' in div_element.attributes.items()[0]:
                    for href in div_element.getElementsByTagName("a"):
                        href_content = href.attributes.items()[0][1]
                        if 'a=commit;' in href_content:
                            parsed_href_content = parse_qsl(href_content, encoding='utf-8', separator=';')
                            output = bytes(parsed_href_content[2][1][:6], "utf-8")
        except Exception as error:
            logger.debug(
                msg=f"It could not get the latest git commit id from the git : {project_git_url}.nThe following error occurred : {error}",
                exc_info=False,
            )
    with open(txt_file, "w", encoding="utf8") as file:
        file.write(output.decode("utf-8"))
    file = open(txt_file, "r", encoding="utf8")
    git_commit_id = file.readline().strip()
    return git_commit_id

It also includes the parsing of the URL where the internal git located.


References:

Answered By: Taras
Categories: questions Tags: , , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.