Skip to content

Commit

Permalink
Merge branch 'main' into bug_fix_#167
Browse files Browse the repository at this point in the history
  • Loading branch information
nvictus authored Apr 6, 2024
2 parents 0a10093 + fe33a9d commit a214350
Show file tree
Hide file tree
Showing 21 changed files with 696 additions and 245 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ name: CI
on:
push:
branches: [ main ]
tags:
- "v*" # Tag events matching v*, i.e. v1.0, v20.15.10

pull_request:
branches: [ main ]

Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
name: Publish Python Package to PyPI

on:
workflow_call:
release:
types: [published]
workflow_dispatch:

jobs:
Expand Down
12 changes: 11 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
# Release notes

## [Upcoming release](https://github.com/open2c/bioframe/compare/v0.6.2...HEAD)
## [Upcoming release](https://github.com/open2c/bioframe/compare/v0.6.3...HEAD)

## [v0.6.3](https://github.com/open2c/bioframe/compare/v0.6.2...v0.6.3)
Date 2024-03-11

Fixes:
* Prevent dropout from `closest` in some cases of left intervals with no neighbors by @agalitsyna in https://github.com/open2c/bioframe/pull/185
* Fix overlap returning float indexes causing failing tests (numpy v1.22.4, pandas v1.5.2) by @agalitsyna in https://github.com/open2c/bioframe/pull/185

**Full Changelog**: https://github.com/open2c/bioframe/compare/v0.6.2...v0.6.3

## [v0.6.2](https://github.com/open2c/bioframe/compare/v0.6.1...v0.6.2)
Date 2024-02-08

Changes:
* cols and df_view_col passed to downstream functions by @smitkadvani in https://github.com/open2c/bioframe/pull/182
Expand Down
156 changes: 150 additions & 6 deletions bioframe/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,150 @@
from ._version import __version__
from .core import *
from .io import *
from .ops import *
from .extras import *
from .vis import *
try:
from importlib.metadata import PackageNotFoundError, version
except ImportError:
from importlib_metadata import PackageNotFoundError, version

try:
__version__ = version("bioframe")
except PackageNotFoundError:
__version__ = "unknown"

__all__ = [
"arrops",
"from_any",
"from_dict",
"from_list",
"from_series",
"is_bedframe",
"is_cataloged",
"is_chrom_dtype",
"is_complete_ucsc_string",
"is_contained",
"is_covering",
"is_overlapping",
"is_sorted",
"is_tiling",
"is_viewframe",
"make_viewframe",
"parse_region",
"parse_region_string",
"sanitize_bedframe",
"to_ucsc_string",
"update_default_colnames",
"binnify",
"digest",
"frac_gc",
"frac_gene_coverage",
"frac_mapped",
"make_chromarms",
"pair_by_distance",
"seq_gc",
"SCHEMAS",
"UCSCClient",
"assemblies_available",
"assembly_info",
"fetch_centromeres",
"fetch_chromsizes",
"load_fasta",
"read_bam",
"read_bigbed",
"read_bigwig",
"read_chromsizes",
"read_pairix",
"read_tabix",
"read_table",
"to_bigbed",
"to_bigwig",
"assign_view",
"closest",
"cluster",
"complement",
"count_overlaps",
"coverage",
"expand",
"merge",
"overlap",
"select",
"select_indices",
"select_labels",
"select_mask",
"setdiff",
"sort_bedframe",
"subtract",
"trim",
"plot_intervals",
"to_ucsc_colorstring",
]

from .core import (
arrops,
from_any,
from_dict,
from_list,
from_series,
is_bedframe,
is_cataloged,
is_chrom_dtype,
is_complete_ucsc_string,
is_contained,
is_covering,
is_overlapping,
is_sorted,
is_tiling,
is_viewframe,
make_viewframe,
parse_region,
parse_region_string,
sanitize_bedframe,
to_ucsc_string,
update_default_colnames,
)
from .extras import (
binnify,
digest,
frac_gc,
frac_gene_coverage,
frac_mapped,
make_chromarms,
pair_by_distance,
seq_gc,
)
from .io import (
SCHEMAS,
UCSCClient,
assemblies_available,
assembly_info,
fetch_centromeres,
fetch_chromsizes,
load_fasta,
read_bam,
read_bigbed,
read_bigwig,
read_chromsizes,
read_pairix,
read_tabix,
read_table,
to_bigbed,
to_bigwig,
)
from .ops import (
assign_view,
closest,
cluster,
complement,
count_overlaps,
coverage,
expand,
merge,
overlap,
select,
select_indices,
select_labels,
select_mask,
setdiff,
sort_bedframe,
subtract,
trim,
)
from .vis import plot_intervals, to_ucsc_colorstring

del version, PackageNotFoundError
1 change: 0 additions & 1 deletion bioframe/_version.py

This file was deleted.

61 changes: 45 additions & 16 deletions bioframe/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,50 @@
from . import arrops

from . import specs
from .specs import *

from . import stringops
from .stringops import *

from . import checks
from .checks import *

from . import construction
from .construction import *
from .checks import (
is_bedframe,
is_cataloged,
is_contained,
is_covering,
is_overlapping,
is_sorted,
is_tiling,
is_viewframe,
)
from .construction import (
from_any,
from_dict,
from_list,
from_series,
make_viewframe,
sanitize_bedframe,
)
from .specs import is_chrom_dtype, update_default_colnames
from .stringops import (
is_complete_ucsc_string,
parse_region,
parse_region_string,
to_ucsc_string,
)

__all__ = [
"arrops",
*specs.__all__,
*stringops.__all__,
*checks.__all__,
*construction.__all__,
"is_bedframe",
"is_cataloged",
"is_contained",
"is_covering",
"is_overlapping",
"is_sorted",
"is_tiling",
"is_viewframe",
"from_any",
"from_dict",
"from_list",
"from_series",
"make_viewframe",
"sanitize_bedframe",
"is_chrom_dtype",
"update_default_colnames",
"is_complete_ucsc_string",
"parse_region",
"parse_region_string",
"to_ucsc_string",
]
20 changes: 20 additions & 0 deletions bioframe/core/arrops.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,23 @@ def _overlap_intervals_legacy(starts1, ends1, starts2, ends2, closed=False, sort

return overlap_ids

def _convert_points_to_len1_segments(starts, ends):
"""
Convert points to len1 segments for internal use in overlap().
This enables desired overlap behavior for points and preserves
behavior for semi-open intervals of len>=1.
Parameters
----------
starts, ends : numpy.ndarray
Returns
-------
pseudo_ends : numpy.ndarray
An array of pseudo-ends for overlapping intervals.
"""
pseudo_ends = ends.copy()
pseudo_ends[ends == starts] += 1
return [starts, pseudo_ends]

def overlap_intervals(starts1, ends1, starts2, ends2, closed=False, sort=False):
"""
Expand Down Expand Up @@ -296,8 +313,11 @@ def overlap_intervals(starts1, ends1, starts2, ends2, closed=False, sort=False):

starts1 = np.asarray(starts1)
ends1 = np.asarray(ends1)
starts1, ends1 = _convert_points_to_len1_segments(starts1, ends1)

starts2 = np.asarray(starts2)
ends2 = np.asarray(ends2)
starts2, ends2 = _convert_points_to_len1_segments(starts2, ends2)

# Concatenate intervals lists
n1 = len(starts1)
Expand Down
2 changes: 1 addition & 1 deletion bioframe/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ def from_any(regions, fill_null=False, name_col="name", cols=None):
raise ValueError(f"Unknown input format: {type(regions)}")

if fill_null:
out_df[sk1] = pd.to_numeric(out_df[sk1]).fillna(0)
try:
out_df[sk1].fillna(0, inplace=True)
ends = []
for i in range(len(out_df)):
if out_df[ek1].values[i] is None:
Expand Down
12 changes: 5 additions & 7 deletions bioframe/extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,19 +305,17 @@ def _each(chrom_group):
seq = str(seq[:])
gc = []
for _, bin in chrom_group.iterrows():
s = seq[bin.start : bin.end]
s = seq[bin["start"] : bin["end"]]
gc.append(seq_gc(s, mapped_only=mapped_only))
return gc

out = df.groupby("chrom", sort=False).apply(_each)
agg = df.groupby("chrom", sort=False)[["start", "end"]].apply(_each)
out_col = pd.Series(data=np.concatenate(agg.values), index=df.index).rename("GC")

if return_input:
return pd.concat(
[df, pd.Series(data=np.concatenate(out), index=df.index).rename("GC")],
axis="columns",
)
return pd.concat([df, out_col], axis="columns")
else:
return pd.Series(data=np.concatenate(out), index=df.index).rename("GC")
return out_col


def seq_gc(seq, mapped_only=True):
Expand Down
41 changes: 29 additions & 12 deletions bioframe/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,34 @@
from .assembly import assemblies_available, assembly_info
from .fileops import (
load_fasta,
read_bam,
read_bigbed,
read_bigwig,
read_chromsizes,
read_pairix,
read_tabix,
read_table,
to_bigbed,
to_bigwig,
)
from .resources import UCSCClient, fetch_centromeres, fetch_chromsizes
from .schemas import SCHEMAS

from . import fileops
from .fileops import *

from . import resources
from .resources import *

from . import assembly
from .assembly import *

__all__ = [
"assemblies_available",
"assembly_info",
"read_table",
"read_chromsizes",
"read_tabix",
"read_pairix",
"read_bam",
"load_fasta",
"read_bigwig",
"to_bigwig",
"read_bigbed",
"to_bigbed",
"UCSCClient",
"fetch_centromeres",
"fetch_chromsizes",
"SCHEMAS",
*fileops.__all__,
*resources.__all__,
*assembly.__all__,
]
Loading

0 comments on commit a214350

Please sign in to comment.