diff --git a/Project.toml b/Project.toml index 3901b19..5879531 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "OMETIFF" uuid = "2d0ec36b-e807-5756-994b-45af29551fcf" authors = ["Tamas Nagy "] -version = "0.3.2" +version = "0.3.3" [deps] AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9" diff --git a/README.md b/README.md index 0720927..5c38f99 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,8 @@ labeled axes provided by [AxisArrays.jl](https://github.com/JuliaImages/AxisArra ## Features - Can open a wide-range of OMETIFF files with a special focus on [correctness](https://github.com/tlnagy/OMETIFF.jl/blob/master/test/runtests.jl) +- Supports memory-mapping to open large TIFF files quickly even on + memory-constrained machines - Spatial and temporal axes are annotated with units if available (like μm, s, etc) - Channel and position axes use their original names - Elapsed times are extracted and returned using the same labeled axes diff --git a/docs/src/index.md b/docs/src/index.md index de794fa..c7dba0e 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -6,7 +6,10 @@ labeled axes provided by [AxisArrays.jl](https://github.com/JuliaImages/AxisArra ## Features -- Can open a wide-range of OMETIFF files with a special focus on [correctness](https://github.com/tlnagy/OMETIFF.jl/blob/master/test/runtests.jl) +- Can open a wide-range of OMETIFF files with a special focus on + [correctness](https://github.com/tlnagy/OMETIFF.jl/blob/master/test/runtests.jl) +- Supports memory-mapping to open large TIFF files quickly even on + memory-constrained machines - Spatial and temporal axes are annotated with units if available (like μm, s, etc) - Channel and position axes use their original names - Elapsed times are extracted and returned using the same labeled axes diff --git a/docs/src/lib/internals.md b/docs/src/lib/internals.md index a7dce85..b6d32d3 100644 --- a/docs/src/lib/internals.md +++ b/docs/src/lib/internals.md @@ -14,6 +14,7 @@ OMETIFF.dump_omexml ```@docs OMETIFF.IFD OMETIFF.TiffFile +OMETIFF.ReadonlyTiffDiskArray ``` ## Logic diff --git a/src/OMETIFF.jl b/src/OMETIFF.jl index b4088da..272de07 100644 --- a/src/OMETIFF.jl +++ b/src/OMETIFF.jl @@ -17,6 +17,7 @@ using DocStringExtensions include("utils.jl") include("files.jl") include("parsing.jl") +include("mmap.jl") include("loader.jl") end # module diff --git a/src/files.jl b/src/files.jl index 43e602d..66a2d63 100644 --- a/src/files.jl +++ b/src/files.jl @@ -13,7 +13,7 @@ mutable struct TiffFile filepath::String """The file stream""" - io::Union{Stream, IOStream} + io::Stream """Location of the first IFD in the file stream""" first_offset::Int @@ -21,7 +21,7 @@ mutable struct TiffFile """Whether this file has a different endianness than the host computer""" need_bswap::Bool - function TiffFile(io::Union{Stream, IOStream}) + function TiffFile(io::Stream) file = new() file.io = io seekstart(io) @@ -47,6 +47,8 @@ function TiffFile(uuid::String, filepath::String) end end +TiffFile(io::IOStream) = TiffFile(Stream(format"OMETIFF", io, extract_filename(io))) + """ IFD(file, strip_offsets) -> IFD @@ -297,6 +299,30 @@ function load_comments(file) metadata["Summary"] end +""" + _read_ifd_data!(target, ifd, buffer) + +Reads the IFD `ifd` into `target` using a temporary buffer `buffer`. If the IFD +is stripped, `buffer` must be 1-dimensional array, otherwise, it should be the +same size as a `target`. +""" +function _read_ifd_data!(ifd::IFD, target::AbstractArray{T, 2}, buffer::AbstractArray{T, 1}) where {T} + n_strips = length(ifd.strip_offsets) + + for j in 1:n_strips + seek(ifd.file.io, ifd.strip_offsets[j]) + read!(ifd.file.io, buffer) + do_bswap(ifd.file, buffer) + view(target, j, :) .= buffer + end +end + +function _read_ifd_data!(ifd::IFD, target::AbstractArray{T, 2}, buffer::AbstractArray{T, 2}) where {T} + seek(ifd.file.io, first(ifd.strip_offsets)) + read!(ifd.file.io, buffer) + do_bswap(ifd.file, buffer) +end + """ do_bswap(file, values) -> Array diff --git a/src/loader.jl b/src/loader.jl index a38acf2..c043030 100644 --- a/src/loader.jl +++ b/src/loader.jl @@ -1,16 +1,31 @@ -function load(f::File{format"OMETIFF"}; dropunused=true) +function load(f::File{format"OMETIFF"}; dropunused=true, inmemory=true) open(f) do s - ret = load(s; dropunused=dropunused) + ret = load(s; dropunused=dropunused, inmemory=inmemory) end end """ - load(io; dropunused) -> ImageMetadata.ImageMeta + load(io; dropunused, inmemory) -> ImageMetadata.ImageMeta -Load an OMETIFF file using the stream `io`. `dropunused` controls whether -dimensions of length 1 are dropped automatically (default) or not. +Load an OMETIFF file using the stream `io`. + +**Arguments** +- `dropunused::Bool`: controls whether dimensions of length 1 are dropped + automatically (default) or not. +- `inmemory::Bool`: controls whether arrays are fully loaded into memory + (default) or left on disk and specific parts only loaded when accessed. + +!!! tip + The `inmemory=false` flag currently returns a read-only view of the data on + the disk for data integrity reasons. In order to modify the contents, you + must copy the data into an in-memory container--at least until + [#52](https://github.com/tlnagy/OMETIFF.jl/issues/52) is fixed--like so: + + ``` + copy(arr) + ``` """ -function load(io::Stream{format"OMETIFF"}; dropunused=true) +function load(io::Stream{format"OMETIFF"}; dropunused=true, inmemory=true) if io.filename != nothing && !occursin(".ome.tif", io.filename) throw(FileIO.LoaderError("OMETIFF", "Not an OME TIFF file!")) end @@ -83,7 +98,11 @@ function load(io::Stream{format"OMETIFF"}; dropunused=true) elapsed_times = get_elapsed_times(containers, master_dims, masteraxis) - img = inmemoryarray(ifds, master_dims, master_rawtype, mappedtype) + if inmemory + img = inmemoryarray(ifds, master_dims, master_rawtype, mappedtype) + else + img = ReadonlyTiffDiskArray(Gray{mappedtype}, master_rawtype, ifds, values(master_dims)); + end # find dimensions of length 1 and remove them if dropunused @@ -121,29 +140,23 @@ function inmemoryarray(ifds::OrderedDict{NTuple{4, Int}, IFD}, # iterate over each IFD for (indices, ifd) in ifds - n_strips = length(ifd.strip_offsets) strip_len = floor(Int, (width * height) / n_strips) # if the data is stripped and we haven't fix tmp's layout then lets make - # tmp equal to one strip. + # tmp equal to one strip. This'll be fixed in Julia 1.4 if n_strips > 1 && size(tmp) != (strip_len, ) tmp = Array{rawtype}(undef, strip_len) end - for j in 1:n_strips - seek(ifd.file.io, ifd.strip_offsets[j]) - read!(ifd.file.io, tmp) - do_bswap(ifd.file, tmp) - if n_strips > 1 - data[j, :, indices...] = tmp - else - data[:, :, indices...] = tmp' - end + target = view(data, :, :, indices...) + _read_ifd_data!(ifd, target, tmp) + + # transposition must happen here since the on-disk variant does this on access + if ndims(tmp) == 2 + target .= tmp' end end reinterpret(Gray{mappedtype}, data) end - - diff --git a/src/mmap.jl b/src/mmap.jl new file mode 100644 index 0000000..03d5a5f --- /dev/null +++ b/src/mmap.jl @@ -0,0 +1,83 @@ +""" + ReadonlyTiffDiskArray(mappedtype, rawtype, ifds, dims) -> ReadonlyTiffDiskArray + +A lazy representation of a OMETIFF file. This custom type is needed since TIFF +files are laid out noncontiguously and nonregularly. It uses an internal index +to determine the mapping from indices to the locations of data slices on disk. +These slices are generally XY slices and are usually loaded in all at once so it +is quickly loaded into an internal cache to speed up the process. Externally, +this type should behave very similarly to an in-memory array, albeit with a +higher cost of accessing an element. + +$(FIELDS) +""" +mutable struct ReadonlyTiffDiskArray{T <: Gray, R, N1, N2} <: AbstractArray{T, N2} + """ + A map of dimensions (sans XY) to the corresponding [`IFD`](@ref) + """ + ifds::OrderedDict{NTuple{N1, Int}, IFD} + + """ + The full set of dimensions of the TIFF file, including XY + """ + dims::NTuple{N2, Int} + + """ + An internal cache to fill when reading from disk + """ + cache::Array{R, 2} + + """ + The dimension indices corresponding to the slice currently in the cache + """ + cache_index::NTuple{N1, Int} + + function ReadonlyTiffDiskArray(::Type{T}, ::Type{R}, ifds::OrderedDict{NTuple{N1, Int}, IFD}, dims::NTuple{N2, Int}) where {T, R, N1, N2} + if N2 - 2 != N1 + error("$N2 dimensions given, but the IFDs are indexed on $N1 dimensions instead of "* + "expected $(N2-2).") + end + new{T, R, N1, N2}(ifds, dims, Array{R}(undef, dims[1], dims[2]), (-1, -1, -1, -1)) + end +end + +Base.size(A::ReadonlyTiffDiskArray) = A.dims + +function Base.getindex(A::ReadonlyTiffDiskArray{Gray{T}, R, N1, N2}, i1::Int, i2::Int, i::Vararg{Int, N1}) where {T, R, N1, N2} + # check the loaded cache is already the correct slice + if A.cache_index == i + return Gray(reinterpret(T, A.cache[i2, i1])) + end + + ifd = A.ifds[i] + + # if the file isn't open, lets open a handle and update it + if !isopen(ifd.file.io) + path = ifd.file.filepath + ifd.file.io = Stream(format"OMETIFF", open(path), path) + end + + n_strips = length(ifd.strip_offsets) + strip_len = floor(Int, (size(A.cache, 1) * size(A.cache, 2)) / n_strips) + + # if the data is striped then we need to change the buffer shape so that we + # can read into it. This should be replaced with a view of cache in Julia + # >1.4, see https://github.com/JuliaLang/julia/pull/33046 + if n_strips > 1 && size(tmp) != (strip_len, ) + tmp = Array{R}(undef, strip_len) + else + tmp = A.cache + end + + _read_ifd_data!(ifd, A.cache, tmp) + + A.cache_index = i + + return Gray(reinterpret(T, A.cache[i2, i1])) +end + +function Base.setindex!(A::ReadonlyTiffDiskArray{Gray{T}, R, N1, N2}, X, I...) where {T, R, N1, N2} + error("This array is on disk and is read only. Convert to a mutable in-memory version by running "* + "`copy(arr)`. \n\n𝗡𝗼𝘁𝗲: For large files this can be quite expensive. A future PR will add "* + "support for reading and writing to/from disk.") +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 7072526..76c0c22 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -155,6 +155,17 @@ end @test size(img) == (24, 18, 1, 1, 5, 1) end end + @testset "Memory mapping" begin + open(joinpath("testdata", "singles", "181003_multi_pos_time_course_1_MMStack.ome.tif")) do f + s = Stream(format"OMETIFF", f, OMETIFF.extract_filename(f)) + img = OMETIFF.load(s, inmemory=false) + img2 = OMETIFF.load(s) + @test size(img) == (256, 256, 10, 2) + @test all(img[1:10,1,1,1] .== img2[1:10,1,1,1]) + # file is read only and should throw an error if you try and modify it + @test_throws ErrorException img[1:10,1,1,1] .= 1.0 + end + end end @testset "Error checks" begin