JuliaDynamics · kahaaga · Jun 9, 2024 · Jun 9, 2024 · Jun 9, 2024 · Jun 9, 2024
diff --git a/docs/src/information_measures.md b/docs/src/information_measures.md
@@ -15,6 +15,7 @@ information(::InformationMeasure, ::OutcomeSpace, ::Any)
 information(::DifferentialInfoEstimator, ::Any)
 information_maximum
 information_normalized
+self_information
 ```
 
 ## Entropies
@@ -36,7 +37,7 @@ ShannonExtropy
 RenyiExtropy
 TsallisExtropy
 ElectronicEntropy
-FluctuationComplexity
+InformationFluctuation
 ```
 
 ## Discrete information estimators

diff --git a/docs/src/probabilities.md b/docs/src/probabilities.md
@@ -92,6 +92,12 @@ Diversity
 SequentialPairDistances
 ```
 
+### Sequential category transitions
+
+```@docs
+SequentialCategoryTransitions
+```
+
 ### Bubble sort swaps
 
 ```@docs

diff --git a/src/core/information_functions.jl b/src/core/information_functions.jl
@@ -1,4 +1,5 @@
 export information, information_maximum, information_normalized, convert_logunit
+export self_information
 export entropy
 
 ###########################################################################################
@@ -279,6 +280,44 @@ function information(::InformationMeasure, ::DifferentialInfoEstimator, args...)
     ))
 end
 
+"""
+    self_information(measure::InformationMeasure, p, N::Int)
+
+Compute the "self-information"/"surprisal" of a single probability `p` under the given 
+information measure, assuming that `p` is part of a length-`N` probability distribution.
+
+For some `measure`s, the information content is independent of `N`, while for others 
+it depends on `N`. For consistency, we require that you always provide `N` (`N` will be 
+ignored if not relevant).
+
+    This function requires `p > 0`; giving `0` can yield `Inf` or `NaN` for certain 
+`measure`s.
+
+## Definition
+
+We here use the definition "self-information" very loosely, and 
+define it as the functional ``I_M(p_i)`` that satisfies ``\\sum_i p_i I_M(p_i) = I_M``,
+where `I_M` is the given information measure. 
+
+If `measure` is [`Shannon`](@ref), then this is the 
+[Shannon self-information](https://en.wikipedia.org/wiki/Information_content), which 
+fulfils a set of axioms. If `measure` is some other information, then it is not guaranteed
+that these axioms are fulfilled. We *only* guarantee that the probability-weighted 
+sum of the self-information equals the information measure.
+
+!!! note "Motivation for this definition"
+    This definition is motivated by the desire to compute generalized 
+    [`InformationFluctuation`](@ref), which is a measure of fluctuations of local self-information
+    relative to some information-theoretic summary statistic of a distribution. Defining 
+    these self-information functions has, as far as we know, not been treated in the literature 
+    before, and will be part of an upcoming paper we're writing! 
+"""
+function self_information(measure::InformationMeasure, pᵢ, N = nothing)
+    throw(ArgumentError(
+        """`InformationMeasure` $(typeof(measure)) does not implement `self_information`."""
+    ))
+end
+
 
 ###########################################################################################
 # Utils

diff --git a/src/core/information_measures.jl b/src/core/information_measures.jl
@@ -37,7 +37,7 @@ for usage examples.
 - [`RenyiExtropy`](@ref).
 - [`TsallisExtropy`](@ref).
 - [`ShannonExtropy`](@ref), which is a subcase of the above two in the limit `q → 1`.
-- [`FluctuationComplexity`](@ref).
+- [`InformationFluctuation`](@ref).
 
 ## Estimators
 

diff --git a/src/encoding_implementations/encoding_implementations.jl b/src/encoding_implementations/encoding_implementations.jl
@@ -7,4 +7,5 @@ include("relative_first_difference_encoding.jl")
 include("unique_elements_encoding.jl")
 include("bubble_sort_swaps_encoding.jl")
 include("combination_encoding.jl")
-include("distance_pair_encoding.jl")
+include("distance_pair_encoding.jl")
+include("sequential_categorical.jl")
diff --git a/src/encoding_implementations/sequential_categorical.jl b/src/encoding_implementations/sequential_categorical.jl
@@ -0,0 +1,88 @@
+using Combinatorics
+export SequentialCategoricalEncoding
+
+"""
+    SequentialCategoricalEncoding <: Encoding
+    SequentialCategoricalEncoding(; symbols, m = 2)
+
+An encoding scheme that [`encode`](@ref)s length-`m` categorical vectors onto integers.
+
+## Description
+
+Given a vector of possible `symbols`, `SequentialCategoricalEncoding` constructs all possible 
+length-`m` sequential symbol transitions. 
+
+The input vector `χ` is always treated as categorical, and can have any element type
+(but encoding/decoding is faster if `χ` is sortable).
+
+## Example
+
+```julia
+encoding = SequentialCategoricalEncoding(symbols = ["hello", "there", "skipper"], m = 2)
+julia> encoding = SequentialCategoricalEncoding(symbols = ["hello", "there", "skipper"], m = 2)
+SequentialCategoricalEncoding, with 3 fields:
+ symbols = ["hello", "there", "skipper"]
+ encode_dict = Dict(["there", "skipper"] => 4, ["skipper", "hello"] => 5, ["there", "hello"] => 3, ["hello", "skipper"] => 2, ["skipper", "there"] => 6, ["hello", "there"] => 1)
+ decode_dict = Dict(5 => ["skipper", "hello"], 4 => ["there", "skipper"], 6 => ["skipper", "there"], 2 => ["hello", "skipper"], 3 => ["there", "hello"], 1 => ["hello", "there"])
+```
+
+We can now use `encoding` to encode and decode transitions:
+
+```julia
+julia> decode(encoding, 1)
+2-element Vector{String}:
+ "hello"
+ "there"
+
+julia> encode(encoding, ["hello", "there"])
+1
+
+julia> encode(encoding, ["there", "skipper"])
+4
+
+julia> decode(encoding, 4)
+2-element Vector{String}:
+ "there"
+ "skipper"
+```
+
+
+"""
+struct SequentialCategoricalEncoding{M, V, ED, DD} <: Encoding
+    symbols::V
+    encode_dict::ED
+    decode_dict::DD
+
+    function SequentialCategoricalEncoding(; symbols, m = 2)
+        s = unique(symbols) # we don't sort, because that would disallow mixing types
+        pgen = permutations(s, m)
+        T = eltype(s)
+        perms = [SVector{m, T}(p) for p in pgen]
+
+        encode_dict = Dict{eltype(perms), Int}()
+        decode_dict = Dict{Int, eltype(perms)}()
+        for (i, pᵢ) in enumerate(perms)
+            encode_dict[pᵢ] = i
+            decode_dict[i] = pᵢ
+        end
+        S, TED, TDD = typeof(s), typeof(encode_dict), typeof(decode_dict)
+        return new{m, S, TED, TDD}(s, encode_dict, decode_dict)
+    end
+end
+
+
+# Note: internally, we represent the transitions with `StaticVector`s. However,
+# `χ` will in general not be a static vector if the user uses `encode` directly. 
+# Therefore, we convert to `StaticVector`. This doesn't allocate, so no need to 
+# worry about performance. 
+function encode(encoding::SequentialCategoricalEncoding{m}, χ::AbstractVector) where {m}
+    if m != length(χ)
+        throw(ArgumentError("Transition length `m` and length of input must match! Got `m = $m` and `length(χ) = $(length(χ))`"))
+    end
+    χstatic = SVector{m, eltype(χ)}(χ)
+    return encoding.encode_dict[χstatic]
+end
+
+function decode(encoding::SequentialCategoricalEncoding{m}, i) where {m}
+    return encoding.decode_dict[i]  
+end
diff --git a/src/information_measure_definitions/curado.jl b/src/information_measure_definitions/curado.jl
@@ -34,3 +34,8 @@ function information_maximum(e::Curado, L::Int)
     # Maximized for the uniform distribution, which for distribution of length L is
     return L * (1 - exp(-b/L)) + exp(-b) - 1
 end
+
+function self_information(e::Curado, p, N::Int)
+    b = e.b
+    return exp(-b*p)/p + (exp(-b) - 1)/N
+end
diff --git a/src/information_measure_definitions/fluctuation_complexity.jl b/src/information_measure_definitions/fluctuation_complexity.jl
@@ -1,58 +1,84 @@
-export FluctuationComplexity
+export InformationFluctuation
 
 """
-    FluctuationComplexity <: InformationMeasure
-    FluctuationComplexity(; definition = Shannon(; base = 2), base = 2)
+    InformationFluctuation <: InformationMeasure
+    InformationFluctuation(; definition = Shannon())
 
-The "fluctuation complexity" quantifies the standard deviation of the information content of the states 
+The information fluctuation quantifies the standard deviation of the information content of the states 
 ``\\omega_i`` around some summary statistic ([`InformationMeasure`](@ref)) of a PMF. Specifically, given some 
 outcome space ``\\Omega`` with outcomes ``\\omega_i \\in \\Omega`` 
 and a probability mass function ``p(\\Omega) = \\{ p(\\omega_i) \\}_{i=1}^N``, it is defined as
 
 ```math
-\\sigma_I(p) := \\sqrt{\\sum_{i=1}^N p_i(I_i - H_*)^2}
+\\sigma_I_Q(p) := \\sqrt{\\sum_{i=1}^N p_i(I_Q(p_i) - F_Q)^2}
 ```
 
-where ``I_i = -\\log_{base}(p_i)`` is the information content of the i-th outcome. The type of information measure
-``*`` is controlled by `definition`. 
+where ``I_Q(p_i)`` is the [`information_content`](@ref) of the i-th outcome with respect to the information 
+measure ``F_Q`` (controlled by `definition`).
 
-The `base` controls the base of the logarithm that goes into the information content terms. Make sure that 
-you pick a `base` that is consistent with the base chosen for the `definition` (relevant for e.g. [`Shannon`](@ref)).
+## Compatible with
+
+- [`Shannon`](@ref)
+- [`Tsallis`](@ref)
+- [`Curado`](@ref)
+- [`StretchedExponential`](@ref)
+- [`ShannonExtropy`](@ref)
+
+If `definition` is the [`Shannon`](@ref) entropy, then we recover the 
+[Shannon-type "information fluctuation complexity"](https://en.wikipedia.org/wiki/Information_fluctuation_complexity) 
+from [Bates1993](@cite). 
 
 ## Properties 
 
-If `definition` is the [`Shannon`](@ref) entropy, then we recover 
-the [Shannon-type information fluctuation complexity](https://en.wikipedia.org/wiki/Information_fluctuation_complexity) 
-from [Bates1993](@cite). Then the fluctuation complexity is zero for PMFs with only a single non-zero element, or 
+Then the information fluctuation is zero for PMFs with only a single non-zero element, or 
 for the uniform distribution.
 
-If `definition` is not Shannon entropy, then the properties of the measure varies, and does not necessarily share the 
-properties [Bates1993](@cite). 
+## Examples
+
+```julia
+using ComplexityMeasures
+using Random; rng = Xoshiro(55543)
+
+# Information fluctuation for a time series encoded by ordinal patterns
+x = rand(rng, 10000)
+def = Tsallis(q = 2) # information measure definition
+pest = RelativeAmount() # probabilities estimator
+o = OrdinalPatterns(m = 3) # outcome space / discretization method
+information(InformationFluctuation(definition = def), pest, o, x)
+```
 
 !!! note "Potential for new research" 
     As far as we know, using other information measures besides Shannon entropy for the 
     fluctuation complexity hasn't been explored in the literature yet. Our implementation, however, allows for it.
-    Please inform us if you try some new combinations!
+    We're currently writing a paper outlining the generalizations to other measures. For now, we verify 
+    correctness of the measure through numerical examples in our test-suite.
 """
-struct FluctuationComplexity{M <: InformationMeasure, I <: Integer} <: InformationMeasure
+struct InformationFluctuation{M <: InformationMeasure, I <: Integer} <: InformationMeasure
     definition::M
     base::I
 
-    function FluctuationComplexity(; definition::D = Shannon(base = 2), base::I = 2) where {D, I}
-        if D isa FluctuationComplexity
-            throw(ArgumentError("Cannot use `FluctuationComplexity` as the summary statistic for `FluctuationComplexity`. Please select some other information measures, like `Shannon`."))
+    function InformationFluctuation(; definition::D = Shannon(base = 2), base::I = 2) where {D, I}
+        if D isa InformationFluctuation
+            throw(ArgumentError("Cannot use `InformationFluctuation` as the summary statistic for `InformationFluctuation`. Please select some other information measures, like `Shannon`."))
         end
         return new{D, I}(definition, base)
     end
 end
 
 # Fluctuation complexity is zero when p_i = 1/N or when p = (1, 0, 0, ...).
-function information(e::FluctuationComplexity, probs::Probabilities)
+function information(e::InformationFluctuation, probs::Probabilities)
     def = e.definition
+    non0_probs = Iterators.filter(!iszero, vec(probs))
     h = information(def, probs)
+    return sqrt(sum(pᵢ * (self_information(def, pᵢ, length(probs)) - h)^2 for pᵢ in non0_probs))
+end
+
+function information_normalized(e::InformationFluctuation, probs::Probabilities)
+    def = e.definition
     non0_probs = Iterators.filter(!iszero, vec(probs))
-    logf = log_with_base(e.base)
-    return sqrt(sum(pᵢ * (-logf(pᵢ) - h) ^ 2 for pᵢ in non0_probs))
+    h = information(def, probs)
+    info_fluct = sqrt(sum(pᵢ * (self_information(def, pᵢ, length(probs)) - h)^2 for pᵢ in non0_probs))
+    return info_fluct / h
 end
 
 # The maximum is not generally known.
diff --git a/src/information_measure_definitions/identification.jl b/src/information_measure_definitions/identification.jl
@@ -27,3 +27,7 @@ end
 function information_maximum(e::Identification, L::Int)
     return 2 * (1 - 1 / L)
 end
+
+function self_information(e::Identification, pᵢ, N = nothing)
+    return 2 * (1 - pᵢ)
+end
diff --git a/src/information_measure_definitions/kaniadakis.jl b/src/information_measure_definitions/kaniadakis.jl
@@ -40,3 +40,8 @@ end
 function information_maximum(e::Kaniadakis, L::Int)
     throw(ErrorException("information_maximum not implemeted for Kaniadakis entropy yet"))
 end
+
+function self_information(e::Kaniadakis, pᵢ, N = nothing)
+    κ = e.κ
+    return (pᵢ^(-κ) - pᵢ^κ) / (2κ)
+end
diff --git a/src/information_measure_definitions/shannon.jl b/src/information_measure_definitions/shannon.jl
@@ -24,4 +24,8 @@ function information(e::Shannon, probs::Probabilities)
     return -sum(x*logf(x) for x in non0_probs)
 end
 
+function self_information(e::Shannon, pᵢ, N)
+    return -log(e.base, pᵢ)
+end
+
 information_maximum(e::Shannon, L::Int) = log_with_base(e.base)(L)
diff --git a/src/information_measure_definitions/shannon_extropy.jl b/src/information_measure_definitions/shannon_extropy.jl
@@ -33,3 +33,7 @@ function information_maximum(e::ShannonExtropy, L::Int)
 
     return (L - 1) * log(e.base, L / (L - 1))
 end
+
+function self_information(e::ShannonExtropy, pᵢ, N = nothing)
+    return -log(e.base, 1 - pᵢ)
+end
diff --git a/src/information_measure_definitions/streched_exponential.jl b/src/information_measure_definitions/streched_exponential.jl
@@ -39,7 +39,6 @@ function stretched_exponential(pᵢ, η, base)
     # integral used in Anteneodo & Plastino (1999). See
     # https://specialfunctions.juliamath.org/stable/functions_list/#SpecialFunctions.gamma_inc
     Γx = gamma(x)
-
     return gamma_inc(x, -log(base, pᵢ))[2] * Γx - pᵢ * Γx
 end
 
@@ -56,3 +55,11 @@ function information_maximum(e::StretchedExponential, L::Int)
     # entry in the tuple returned from `gamma_inc`.
     L * gamma_inc(x, log(e.base, L))[2] * Γx - Γx
 end
+
+function self_information(e::StretchedExponential, pᵢ, N) 
+    η, base = e.η, e.base 
+    Γ₁ = gamma((η + 1) / η, -log(base, pᵢ))
+    Γ₂ = gamma((η + 1) / η)
+    # NB! Filter for pᵢ != 0 before calling this method.
+    return Γ₁/pᵢ - Γ₂
+end
diff --git a/src/information_measure_definitions/tsallis.jl b/src/information_measure_definitions/tsallis.jl
@@ -50,3 +50,7 @@ function information_maximum(e::Tsallis, L::Int)
         return k*(L^(1 - q) - 1) / (1 - q)
     end
 end
+
+function self_information(e::Tsallis, pᵢ, N = nothing)
+    return (1 - pᵢ^(e.q- 1)) / (e.q - 1)
+end
diff --git a/src/information_measure_definitions/tsallis_extropy.jl b/src/information_measure_definitions/tsallis_extropy.jl
@@ -57,3 +57,8 @@ function information_maximum(e::TsallisExtropy, L::Int)
 
     return ((L - 1) * L^(q - 1) - (L - 1)^q) / ((q - 1) * L^(q - 1))
 end
+
+function self_information(e::TsallisExtropy, pᵢ, N) #must have N
+    k, q = e.k, e.q
+    return (N - 1)/(q - 1) - (1 - pᵢ)^q / (q-1)
+end