parse LE/BE indicator bytes

2025-12-09 01:31:47 +01:00
parent 30f5a8d6a1
commit 626218b7fa
6 changed files with 43 additions and 38 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -177,3 +177,4 @@ cython_debug/

 # ...
 *.ARW
+*.JPG
--- a/README.md
+++ b/README.md
@@ -2,8 +2,10 @@

 QIFDP - Quick IFD Parser

-Tested with:
- Sony ARW v4.0.1
+Instead of parsing and processing the whole IFD sections, this lib will only search for a given IFD tag and only parses this one tag, in order to save alot of time. The intention behind this are python programs, which process a lot of images where any improvenent time-wise counts.
+
+# Tested formats
+- Sony ARW v4.01

 # Build

--- a/examples/read_ifd_tags.py
+++ b/examples/read_ifd_tags.py
@@ -8,7 +8,7 @@ date = get_raw_ifd_tag(file_path=file_path)
 print(date)

 # Get the camera brand name
-make = get_raw_ifd_tag(file_path=file_path, tag_bytes=IFDTagMap.Make)
+make = get_raw_ifd_tag(file_path=file_path, ifd_tag=IFDTagMap.Make)
 print(make)

 # If the buffer from 0x80000 (512k) inst enough for parsing the IFD tag,
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "qifdp"
-version = "0.1.0"
-description = "Add your description here"
+version = "25.12.9"
+description = "A Python library to quickly parse IFD Tags"
 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = []
@@ -15,18 +15,3 @@ package-dir = {"" = "src"}

 [tool.setuptools.packages.find]
 where = ["src"]
-
-[metadata]
-name = "qidfp"
-version = "0.1.0"
-description = "A Python library to quickly parse IFD Tags"
-long_description = "file:README.md"
-long_description_content_type = "text/markdown"
-author = "DasMoorhuhn"
-author_email = "dasmoorhuhn@proton.me"
-license = "GPLv3.0"
-url = "https://github.com/yourusername/my-library"
-classifiers = [
-    "Programming Language :: Python :: 3",
-    "Operating System :: OS Independent",
-]
--- a/src/qifdp/main.py
+++ b/src/qifdp/main.py
@@ -1,4 +1,5 @@
 import struct
+from typing import Tuple


 class IFDTagMap:
@@ -41,7 +42,24 @@ class IFDTagMap:
    CameraOwnerName = b'\xC6\x0E'       # 0xC60E


-def get_raw_ifd_tag(file_path:str, read_buffer:int=0x80000, tag_bytes:bytes=IFDTagMap.DateTimeOriginal) -> str:
+def check_for_endian(data: bytes) -> Tuple[int, int]|None:
+    header = struct.unpack(">H", data[:2])[0]
+    offset = 0
+    # Check for JPEG Header. If present, go to TIFF.
+    if header == 0xFFD8:
+        raise ValueError("JPEG not supported yet.")
+        header = struct.unpack_from(">H", data, 12)[0]
+        offset = 12
+
+    if header == 0x4949:   # "II" LE
+        return 1, offset
+    if header == 0x4D4D:   # "MM" BE
+        return 2, offset
+
+    return None
+
+
+def get_raw_ifd_tag(file_path:str, read_buffer:int=0x80000, ifd_tag:bytes=IFDTagMap.DateTimeOriginal) -> str:
    with open(file_path, "rb") as f:
        f.seek(0, 2)
        size_bytes = f.tell()
@@ -49,15 +67,19 @@ def get_raw_ifd_tag(file_path:str, read_buffer:int=0x80000, tag_bytes:bytes=IFDT
        f.seek(0)
        data = f.read(read_buffer)

-    # Try first LE then BE
-    offset = data.find(tag_bytes[::-1])
+        # Parse header to check if IFD Tags are LE or BE
+        endian, tiff_offset = check_for_endian(data)
+        if endian == 1:
            endian = "<"
-    if offset == -1:
-        offset = data.find(tag_bytes)
+            ifd_tag = ifd_tag[::-1]
+        elif endian == 2:
            endian = ">"
+        else: return
+
+    offset = data[tiff_offset:].find(ifd_tag)
        
    if offset == -1:
-        raise ValueError("Tag 0x9003 not found in the file.")
+        raise ValueError(f"Tag {hex(ifd_tag)} not found in the file.")

    # Read type and count
    type_, count = struct.unpack(endian + "HI", data[offset+2:offset+8])
@@ -66,13 +88,8 @@ def get_raw_ifd_tag(file_path:str, read_buffer:int=0x80000, tag_bytes:bytes=IFDT
    value_or_offset = struct.unpack(endian + "I", data[offset+8:offset+12])[0]

    # If ASCII and count > 4, value_or_offset is the file offset of the string
+    # print(hex(offset))
    if type_ == 2 and count > 4:
-        value_offset = value_or_offset
-        value_bytes = data[value_offset:value_offset+count]
-        value = value_bytes.decode('ascii')
+        return data[value_or_offset:value_or_offset+count].decode('ascii')
    else:
-        # Small values stored inline
-        value_bytes = data[offset+8:offset+8+count]
-        value = value_bytes.decode('ascii')
-
-    return value
+        return data[offset+8:offset+8+count].decode('ascii')
--- a/uv.lock
+++ b/uv.lock
@@ -4,5 +4,5 @@ requires-python = ">=3.13"

 [[package]]
 name = "qifdp"
-version = "0.1.0"
+version = "25.12.9"
 source = { editable = "." }