Determining the latest version of a file (python)
Question:
This has me stumped…
I have a list of files in a folder. Eg.
myFiles = ["apple_d_v01.jpg", "apple_d_v02.jpg", "apple_d_v03.jpg", "something_d.jpg", "anotherthing_d.jpg"]
There are three versions of the file “apple_d”, using a version suffix of “_vxx”. I want to be able to modify the list to have only the latest version, so that
myFiles = ["apple_d_v03.jpg", "something_d.jpg", "anotherthing_d.jpg"]
Any ideas?
Thanks very much.
edit: came up with this thismorning- it works fine for purpose, but is a little different to the question I initially asked. Thanks all for helping out.
myFiles = ["apple_d.jpg", "apple_dm.jpg", "apple_d_v2.jpg", "apple_d_v3.jpg", "something_d.jpg", "anotherthing_d.jpg", "test2_s_v01", "test2_s_v02.jpg", "test2_s_v03.jpg", "test2_s_v04.jpg" ]
objVersions = []
obj = "cube" #controlled by variable
suf = "d" #controlled by variable
ext = ".jpg" #controlled by variable
for file in myFiles:
if obj + "_" + suf + "_" in file:
objVersions.append(file)
if obj + "_" + suf + "." in file:
objVersions.append(file)
objVersions = sorted(objVersions, reverse=True)
for file in objVersions:
if ext not in file:
objVersions.remove(file)
chosenfile = objVersions[0]
Answers:
Assuming that d
is a version number in your question
latestVersion = max(int(fname.rsplit('.',1)[0].rsplit("_",1)[1].strip('v')) for fname in myFiles)
From your comments, I understand that you want to keep the latest versions of versioned files. For that, you’ll need this:
answer = set()
for fname in myFiles:
name, version = fname.rsplit('.', 1)[0].rsplit("_",1)
if version.startswith('v'): # this is a versioned file
answer.add(
max((fname for fname in myFiles if fname.startswith(name) and not fname.rsplit('.', 1)[0].endswith('d')),
key=lambda fname: int(
fname.rsplit('.', 1)[0].rsplit("_",1)[1].strip('v')) ))
else:
answer.add(fname)
This Method i made i think will do what you asked, It takes a List of file names and finds the latest version, It then searches for all files that contain a version tag and removes the ones that are not latest. It will not work if some files are only updated to a version 2 and others a 3.
def removePreviousVersions(FileNameList):
returnList = []
LatestVersion = 0
for FileName in FileNameList:
if FileName.find('_v') > -1:
Name, Version = (FileName.replace('.jpg', '')).split('_v')
if LatestVersion < int(Version):
LatestVersion = int(Version)
argument = '_v'+ str(LatestVersion).zfill(2)
for FileName in FileNameList:
if FileName.find('_v') == -1:
returnList.append(FileName)
elif FileName.find(argument) != -1:
returnList.append(FileName)
return returnList
This example right here works using a similar method but it will grab the latest version of a file even if it is an older version the the latest version of a file.
def removePreviousVersions(FileNameList):
TempFileNameList = []
ReturnList = []
for FileName in FileNameList:
if '_v' in FileName:
Name, Version = (FileName.replace('.jpg', '')).split('_v')
if Name not in TempFileNameList:
TempFileNameList.append(Name)
latestVersion = 0
TempFileName = ''
for fname in FileNameList:
if Name in fname:
tName, tVersion = (fname.replace('.jpg', '')).split('_v')
if int(tVersion) > int(latestVersion):
latestVersion = int(tVersion)
TempFileName = fname
ReturnList.append(TempFileName)
else:
ReturnList.append(FileName)
return ReturnList
Example Input:
NameList = [“stupd_d_v01.jpg”, “apple_d_v01.jpg”, “apple_d_v02.jpg”, “apple_d_v03.jpg”, “something_d.jpg”, “anotherthing_d.jpg”]
returns [“stupd_d_v01.jpg”, “apple_d_v03.jpg”, “something_d.jpg”, “anotherthing_d.jpg”]
Assuming that the versions are always with the syntax of _v##, you can do this:
import re
parts_re = re.compile(r'^(.+_d)(.*).jpg$')
def remove_oldies(list):
final = []
saved_version = ''
saved_name = ''
for item in sorted(list):
name, version = parts_re.search(item).group(1,2)
if name != saved_name:
if saved_name != '':
final.append(saved_name + saved_version + ".jpg")
saved_version = version
saved_name = name
else:
saved_version = version
final.append(saved_name + saved_version + ".jpg")
return final
remove_oldies(myFiles)
A short (and functional) answer would be something like:
files= [ (f.split("_d")[0],int("0"+re.search("((_v)?([0-9]+|)).jpg",f.split("_d")[1]).group(3)),f) for f in myFiles]
result= [ [ f[2] for f in files if f[0] == fn and f[1] == max( [ f[1] for f in files if f[0] == fn ] ) ][0] for fn in set( f[0] for f in files ) ]
With a bit of expansion and comments added:
# List of tuples of the form ('apple', 2, 'apple_d_v02.jpg') and ('something', 0, 'something_d.jpg')
files= [ (f.split("_d")[0],int("0"+re.search("((_v)?([0-9]+|)).jpg",f.split("_d")[1]).group(3)),f) for f in myFiles]
basename= 0 # index of basename (apple, something, etc) in each tuple inside "files"
version= 1 # index of version in each tuple inside "files"
fullname= 2 # index of full filename in each tuple inside "files"
result= [ [ f[fullname] for f in files if f[basename] == current_basename and f[version] == max( [ f[version] for f in files if f[basename] == current_basename ] ) ][0] for current_basename in set( f[basename] for f in files ) ]
Last line can be further expanded into:
def max_version_fullname(current_basename):
versions= [ f[version] for f in files if f[basename] == current_basename ]
max_version= max( versions )
fullnames_for_max_version= [ f[fullname] for f in files if f[basename] == current_basename and f[version] == max_version ]
fullname_for_max_version= fullnames_for_max_version[0]
return fullname_for_max_version
basenames= set( f[basename] for f in files )
result= [ max_version_fullname(current_basename) for current_basename in basenames ]
this is my answer,hope to help you.:
from distutils.version import LooseVersion
files= ["apple_d.jpg", "apple_dm.jpg", "apple_d_v2.jpg", "apple_d_v3.jpg", "something_d.jpg", "anotherthing_d.jpg", "test2_s_v01", "test2_s_v02.jpg", "test2_s_v03.jpg", "test2_s_v04.jpg" ]
listA = [f for f in files if f.startswith('apple_d')]
listB = [f for f in files if not f.startswith('apple_d')]
listC = [LooseVersion(f) for f in listA]
listB.append(str(max(listC)))
print(listB)
This has me stumped…
I have a list of files in a folder. Eg.
myFiles = ["apple_d_v01.jpg", "apple_d_v02.jpg", "apple_d_v03.jpg", "something_d.jpg", "anotherthing_d.jpg"]
There are three versions of the file “apple_d”, using a version suffix of “_vxx”. I want to be able to modify the list to have only the latest version, so that
myFiles = ["apple_d_v03.jpg", "something_d.jpg", "anotherthing_d.jpg"]
Any ideas?
Thanks very much.
edit: came up with this thismorning- it works fine for purpose, but is a little different to the question I initially asked. Thanks all for helping out.
myFiles = ["apple_d.jpg", "apple_dm.jpg", "apple_d_v2.jpg", "apple_d_v3.jpg", "something_d.jpg", "anotherthing_d.jpg", "test2_s_v01", "test2_s_v02.jpg", "test2_s_v03.jpg", "test2_s_v04.jpg" ]
objVersions = []
obj = "cube" #controlled by variable
suf = "d" #controlled by variable
ext = ".jpg" #controlled by variable
for file in myFiles:
if obj + "_" + suf + "_" in file:
objVersions.append(file)
if obj + "_" + suf + "." in file:
objVersions.append(file)
objVersions = sorted(objVersions, reverse=True)
for file in objVersions:
if ext not in file:
objVersions.remove(file)
chosenfile = objVersions[0]
Assuming that d
is a version number in your question
latestVersion = max(int(fname.rsplit('.',1)[0].rsplit("_",1)[1].strip('v')) for fname in myFiles)
From your comments, I understand that you want to keep the latest versions of versioned files. For that, you’ll need this:
answer = set()
for fname in myFiles:
name, version = fname.rsplit('.', 1)[0].rsplit("_",1)
if version.startswith('v'): # this is a versioned file
answer.add(
max((fname for fname in myFiles if fname.startswith(name) and not fname.rsplit('.', 1)[0].endswith('d')),
key=lambda fname: int(
fname.rsplit('.', 1)[0].rsplit("_",1)[1].strip('v')) ))
else:
answer.add(fname)
This Method i made i think will do what you asked, It takes a List of file names and finds the latest version, It then searches for all files that contain a version tag and removes the ones that are not latest. It will not work if some files are only updated to a version 2 and others a 3.
def removePreviousVersions(FileNameList):
returnList = []
LatestVersion = 0
for FileName in FileNameList:
if FileName.find('_v') > -1:
Name, Version = (FileName.replace('.jpg', '')).split('_v')
if LatestVersion < int(Version):
LatestVersion = int(Version)
argument = '_v'+ str(LatestVersion).zfill(2)
for FileName in FileNameList:
if FileName.find('_v') == -1:
returnList.append(FileName)
elif FileName.find(argument) != -1:
returnList.append(FileName)
return returnList
This example right here works using a similar method but it will grab the latest version of a file even if it is an older version the the latest version of a file.
def removePreviousVersions(FileNameList):
TempFileNameList = []
ReturnList = []
for FileName in FileNameList:
if '_v' in FileName:
Name, Version = (FileName.replace('.jpg', '')).split('_v')
if Name not in TempFileNameList:
TempFileNameList.append(Name)
latestVersion = 0
TempFileName = ''
for fname in FileNameList:
if Name in fname:
tName, tVersion = (fname.replace('.jpg', '')).split('_v')
if int(tVersion) > int(latestVersion):
latestVersion = int(tVersion)
TempFileName = fname
ReturnList.append(TempFileName)
else:
ReturnList.append(FileName)
return ReturnList
Example Input:
NameList = [“stupd_d_v01.jpg”, “apple_d_v01.jpg”, “apple_d_v02.jpg”, “apple_d_v03.jpg”, “something_d.jpg”, “anotherthing_d.jpg”]
returns [“stupd_d_v01.jpg”, “apple_d_v03.jpg”, “something_d.jpg”, “anotherthing_d.jpg”]
Assuming that the versions are always with the syntax of _v##, you can do this:
import re
parts_re = re.compile(r'^(.+_d)(.*).jpg$')
def remove_oldies(list):
final = []
saved_version = ''
saved_name = ''
for item in sorted(list):
name, version = parts_re.search(item).group(1,2)
if name != saved_name:
if saved_name != '':
final.append(saved_name + saved_version + ".jpg")
saved_version = version
saved_name = name
else:
saved_version = version
final.append(saved_name + saved_version + ".jpg")
return final
remove_oldies(myFiles)
A short (and functional) answer would be something like:
files= [ (f.split("_d")[0],int("0"+re.search("((_v)?([0-9]+|)).jpg",f.split("_d")[1]).group(3)),f) for f in myFiles]
result= [ [ f[2] for f in files if f[0] == fn and f[1] == max( [ f[1] for f in files if f[0] == fn ] ) ][0] for fn in set( f[0] for f in files ) ]
With a bit of expansion and comments added:
# List of tuples of the form ('apple', 2, 'apple_d_v02.jpg') and ('something', 0, 'something_d.jpg')
files= [ (f.split("_d")[0],int("0"+re.search("((_v)?([0-9]+|)).jpg",f.split("_d")[1]).group(3)),f) for f in myFiles]
basename= 0 # index of basename (apple, something, etc) in each tuple inside "files"
version= 1 # index of version in each tuple inside "files"
fullname= 2 # index of full filename in each tuple inside "files"
result= [ [ f[fullname] for f in files if f[basename] == current_basename and f[version] == max( [ f[version] for f in files if f[basename] == current_basename ] ) ][0] for current_basename in set( f[basename] for f in files ) ]
Last line can be further expanded into:
def max_version_fullname(current_basename):
versions= [ f[version] for f in files if f[basename] == current_basename ]
max_version= max( versions )
fullnames_for_max_version= [ f[fullname] for f in files if f[basename] == current_basename and f[version] == max_version ]
fullname_for_max_version= fullnames_for_max_version[0]
return fullname_for_max_version
basenames= set( f[basename] for f in files )
result= [ max_version_fullname(current_basename) for current_basename in basenames ]
this is my answer,hope to help you.:
from distutils.version import LooseVersion
files= ["apple_d.jpg", "apple_dm.jpg", "apple_d_v2.jpg", "apple_d_v3.jpg", "something_d.jpg", "anotherthing_d.jpg", "test2_s_v01", "test2_s_v02.jpg", "test2_s_v03.jpg", "test2_s_v04.jpg" ]
listA = [f for f in files if f.startswith('apple_d')]
listB = [f for f in files if not f.startswith('apple_d')]
listC = [LooseVersion(f) for f in listA]
listB.append(str(max(listC)))
print(listB)