Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for memory mapping Tiff files #51

Merged
merged 6 commits into from
Dec 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "OMETIFF"
uuid = "2d0ec36b-e807-5756-994b-45af29551fcf"
authors = ["Tamas Nagy <[email protected]>"]
version = "0.3.2"
version = "0.3.3"

[deps]
AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9"
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ labeled axes provided by [AxisArrays.jl](https://github.com/JuliaImages/AxisArra
## Features

- Can open a wide-range of OMETIFF files with a special focus on [correctness](https://github.com/tlnagy/OMETIFF.jl/blob/master/test/runtests.jl)
- Supports memory-mapping to open large TIFF files quickly even on
memory-constrained machines
- Spatial and temporal axes are annotated with units if available (like μm, s, etc)
- Channel and position axes use their original names
- Elapsed times are extracted and returned using the same labeled axes
Expand Down
5 changes: 4 additions & 1 deletion docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ labeled axes provided by [AxisArrays.jl](https://github.com/JuliaImages/AxisArra

## Features

- Can open a wide-range of OMETIFF files with a special focus on [correctness](https://github.com/tlnagy/OMETIFF.jl/blob/master/test/runtests.jl)
- Can open a wide-range of OMETIFF files with a special focus on
[correctness](https://github.com/tlnagy/OMETIFF.jl/blob/master/test/runtests.jl)
- Supports memory-mapping to open large TIFF files quickly even on
memory-constrained machines
- Spatial and temporal axes are annotated with units if available (like μm, s, etc)
- Channel and position axes use their original names
- Elapsed times are extracted and returned using the same labeled axes
Expand Down
1 change: 1 addition & 0 deletions docs/src/lib/internals.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ OMETIFF.dump_omexml
```@docs
OMETIFF.IFD
OMETIFF.TiffFile
OMETIFF.ReadonlyTiffDiskArray
```

## Logic
Expand Down
1 change: 1 addition & 0 deletions src/OMETIFF.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ using DocStringExtensions
include("utils.jl")
include("files.jl")
include("parsing.jl")
include("mmap.jl")
include("loader.jl")

end # module
30 changes: 28 additions & 2 deletions src/files.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ mutable struct TiffFile
filepath::String

"""The file stream"""
io::Union{Stream, IOStream}
io::Stream

"""Location of the first IFD in the file stream"""
first_offset::Int

"""Whether this file has a different endianness than the host computer"""
need_bswap::Bool

function TiffFile(io::Union{Stream, IOStream})
function TiffFile(io::Stream)
file = new()
file.io = io
seekstart(io)
Expand All @@ -47,6 +47,8 @@ function TiffFile(uuid::String, filepath::String)
end
end

TiffFile(io::IOStream) = TiffFile(Stream(format"OMETIFF", io, extract_filename(io)))

"""
IFD(file, strip_offsets) -> IFD

Expand Down Expand Up @@ -297,6 +299,30 @@ function load_comments(file)
metadata["Summary"]
end

"""
_read_ifd_data!(target, ifd, buffer)

Reads the IFD `ifd` into `target` using a temporary buffer `buffer`. If the IFD
is stripped, `buffer` must be 1-dimensional array, otherwise, it should be the
same size as a `target`.
"""
function _read_ifd_data!(ifd::IFD, target::AbstractArray{T, 2}, buffer::AbstractArray{T, 1}) where {T}
n_strips = length(ifd.strip_offsets)

for j in 1:n_strips
seek(ifd.file.io, ifd.strip_offsets[j])
read!(ifd.file.io, buffer)
do_bswap(ifd.file, buffer)
view(target, j, :) .= buffer
end
end

function _read_ifd_data!(ifd::IFD, target::AbstractArray{T, 2}, buffer::AbstractArray{T, 2}) where {T}
seek(ifd.file.io, first(ifd.strip_offsets))
read!(ifd.file.io, buffer)
do_bswap(ifd.file, buffer)
end

"""
do_bswap(file, values) -> Array

Expand Down
53 changes: 33 additions & 20 deletions src/loader.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,31 @@
function load(f::File{format"OMETIFF"}; dropunused=true)
function load(f::File{format"OMETIFF"}; dropunused=true, inmemory=true)
open(f) do s
ret = load(s; dropunused=dropunused)
ret = load(s; dropunused=dropunused, inmemory=inmemory)
end
end

"""
load(io; dropunused) -> ImageMetadata.ImageMeta
load(io; dropunused, inmemory) -> ImageMetadata.ImageMeta

Load an OMETIFF file using the stream `io`. `dropunused` controls whether
dimensions of length 1 are dropped automatically (default) or not.
Load an OMETIFF file using the stream `io`.

**Arguments**
- `dropunused::Bool`: controls whether dimensions of length 1 are dropped
automatically (default) or not.
- `inmemory::Bool`: controls whether arrays are fully loaded into memory
(default) or left on disk and specific parts only loaded when accessed.

!!! tip
The `inmemory=false` flag currently returns a read-only view of the data on
the disk for data integrity reasons. In order to modify the contents, you
must copy the data into an in-memory container--at least until
[#52](https://github.com/tlnagy/OMETIFF.jl/issues/52) is fixed--like so:

```
copy(arr)
```
"""
function load(io::Stream{format"OMETIFF"}; dropunused=true)
function load(io::Stream{format"OMETIFF"}; dropunused=true, inmemory=true)
if io.filename != nothing && !occursin(".ome.tif", io.filename)
throw(FileIO.LoaderError("OMETIFF", "Not an OME TIFF file!"))
end
Expand Down Expand Up @@ -83,7 +98,11 @@ function load(io::Stream{format"OMETIFF"}; dropunused=true)

elapsed_times = get_elapsed_times(containers, master_dims, masteraxis)

img = inmemoryarray(ifds, master_dims, master_rawtype, mappedtype)
if inmemory
img = inmemoryarray(ifds, master_dims, master_rawtype, mappedtype)
else
img = ReadonlyTiffDiskArray(Gray{mappedtype}, master_rawtype, ifds, values(master_dims));
end

# find dimensions of length 1 and remove them
if dropunused
Expand Down Expand Up @@ -121,29 +140,23 @@ function inmemoryarray(ifds::OrderedDict{NTuple{4, Int}, IFD},

# iterate over each IFD
for (indices, ifd) in ifds

n_strips = length(ifd.strip_offsets)
strip_len = floor(Int, (width * height) / n_strips)

# if the data is stripped and we haven't fix tmp's layout then lets make
# tmp equal to one strip.
# tmp equal to one strip. This'll be fixed in Julia 1.4
if n_strips > 1 && size(tmp) != (strip_len, )
tmp = Array{rawtype}(undef, strip_len)
end

for j in 1:n_strips
seek(ifd.file.io, ifd.strip_offsets[j])
read!(ifd.file.io, tmp)
do_bswap(ifd.file, tmp)
if n_strips > 1
data[j, :, indices...] = tmp
else
data[:, :, indices...] = tmp'
end
target = view(data, :, :, indices...)
_read_ifd_data!(ifd, target, tmp)

# transposition must happen here since the on-disk variant does this on access
if ndims(tmp) == 2
target .= tmp'
end
end

reinterpret(Gray{mappedtype}, data)
end


83 changes: 83 additions & 0 deletions src/mmap.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
ReadonlyTiffDiskArray(mappedtype, rawtype, ifds, dims) -> ReadonlyTiffDiskArray

A lazy representation of a OMETIFF file. This custom type is needed since TIFF
files are laid out noncontiguously and nonregularly. It uses an internal index
to determine the mapping from indices to the locations of data slices on disk.
These slices are generally XY slices and are usually loaded in all at once so it
is quickly loaded into an internal cache to speed up the process. Externally,
this type should behave very similarly to an in-memory array, albeit with a
higher cost of accessing an element.

$(FIELDS)
"""
mutable struct ReadonlyTiffDiskArray{T <: Gray, R, N1, N2} <: AbstractArray{T, N2}
"""
A map of dimensions (sans XY) to the corresponding [`IFD`](@ref)
"""
ifds::OrderedDict{NTuple{N1, Int}, IFD}

"""
The full set of dimensions of the TIFF file, including XY
"""
dims::NTuple{N2, Int}

"""
An internal cache to fill when reading from disk
"""
cache::Array{R, 2}

"""
The dimension indices corresponding to the slice currently in the cache
"""
cache_index::NTuple{N1, Int}

function ReadonlyTiffDiskArray(::Type{T}, ::Type{R}, ifds::OrderedDict{NTuple{N1, Int}, IFD}, dims::NTuple{N2, Int}) where {T, R, N1, N2}
if N2 - 2 != N1
error("$N2 dimensions given, but the IFDs are indexed on $N1 dimensions instead of "*
"expected $(N2-2).")
end
new{T, R, N1, N2}(ifds, dims, Array{R}(undef, dims[1], dims[2]), (-1, -1, -1, -1))
end
end

Base.size(A::ReadonlyTiffDiskArray) = A.dims

function Base.getindex(A::ReadonlyTiffDiskArray{Gray{T}, R, N1, N2}, i1::Int, i2::Int, i::Vararg{Int, N1}) where {T, R, N1, N2}
# check the loaded cache is already the correct slice
if A.cache_index == i
return Gray(reinterpret(T, A.cache[i2, i1]))
end

ifd = A.ifds[i]

# if the file isn't open, lets open a handle and update it
if !isopen(ifd.file.io)
path = ifd.file.filepath
ifd.file.io = Stream(format"OMETIFF", open(path), path)
end

n_strips = length(ifd.strip_offsets)
strip_len = floor(Int, (size(A.cache, 1) * size(A.cache, 2)) / n_strips)

# if the data is striped then we need to change the buffer shape so that we
# can read into it. This should be replaced with a view of cache in Julia
# >1.4, see https://github.com/JuliaLang/julia/pull/33046
if n_strips > 1 && size(tmp) != (strip_len, )
tmp = Array{R}(undef, strip_len)
else
tmp = A.cache
end

_read_ifd_data!(ifd, A.cache, tmp)

A.cache_index = i

return Gray(reinterpret(T, A.cache[i2, i1]))
end

function Base.setindex!(A::ReadonlyTiffDiskArray{Gray{T}, R, N1, N2}, X, I...) where {T, R, N1, N2}
error("This array is on disk and is read only. Convert to a mutable in-memory version by running "*
"`copy(arr)`. \n\n𝗡𝗼𝘁𝗲: For large files this can be quite expensive. A future PR will add "*
"support for reading and writing to/from disk.")
end
11 changes: 11 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,17 @@ end
@test size(img) == (24, 18, 1, 1, 5, 1)
end
end
@testset "Memory mapping" begin
open(joinpath("testdata", "singles", "181003_multi_pos_time_course_1_MMStack.ome.tif")) do f
s = Stream(format"OMETIFF", f, OMETIFF.extract_filename(f))
img = OMETIFF.load(s, inmemory=false)
img2 = OMETIFF.load(s)
@test size(img) == (256, 256, 10, 2)
@test all(img[1:10,1,1,1] .== img2[1:10,1,1,1])
# file is read only and should throw an error if you try and modify it
@test_throws ErrorException img[1:10,1,1,1] .= 1.0
end
end
end

@testset "Error checks" begin
Expand Down