TidesDB Python API Reference
If you want to download the source of this document, you can find it here.
Getting Started
Prerequisites
You must have the TidesDB shared C library installed on your system. You can find the installation instructions here.
Installation
Or install from source:
git clone https://github.com/tidesdb/tidesdb-python.gitcd tidesdb-pythonpip install -e .Quick Start
import tidesdb
db = tidesdb.TidesDB.open("./mydb")
config = tidesdb.default_column_family_config()config.compression_algorithm = tidesdb.CompressionAlgorithm.LZ4_COMPRESSIONdb.create_column_family("users", config)
cf = db.get_column_family("users")
with db.begin_txn() as txn: txn.put(cf, b"user:1", b"Alice") txn.put(cf, b"user:2", b"Bob") txn.commit()
with db.begin_txn() as txn: value = txn.get(cf, b"user:1") print(f"user:1 = {value.decode()}")
with db.begin_txn() as txn: with txn.new_iterator(cf) as it: it.seek_to_first() for key, value in it: print(f"{key.decode()} = {value.decode()}")
db.drop_column_family("users")db.close()Usage
Opening a Database
import tidesdb
db = tidesdb.TidesDB.open("./mydb")
# With configurationconfig = tidesdb.Config( db_path="./mydb", num_flush_threads=2, num_compaction_threads=2, log_level=tidesdb.LogLevel.LOG_INFO, block_cache_size=64 * 1024 * 1024, # 64MB max_open_sstables=256, log_to_file=False, # Write logs to file instead of stderr log_truncation_at=24 * 1024 * 1024, # Log file truncation size (24MB))db = tidesdb.TidesDB(config)
# Using context managerwith tidesdb.TidesDB.open("./mydb") as db: # ... use database pass # automatically closedColumn Families
db.create_column_family("my_cf")
# Create with custom configconfig = tidesdb.default_column_family_config()config.write_buffer_size = 128 * 1024 * 1024 # 128MBconfig.compression_algorithm = tidesdb.CompressionAlgorithm.ZSTD_COMPRESSIONconfig.enable_bloom_filter = Trueconfig.bloom_fpr = 0.01config.sync_mode = tidesdb.SyncMode.SYNC_INTERVALconfig.sync_interval_us = 128000 # 128msconfig.klog_value_threshold = 512 # Values > 512 bytes go to vlogconfig.min_disk_space = 100 * 1024 * 1024 # 100MB minimum disk spaceconfig.default_isolation_level = tidesdb.IsolationLevel.READ_COMMITTEDconfig.l1_file_count_trigger = 4 # L1 compaction triggerconfig.l0_queue_stall_threshold = 20 # L0 backpressure thresholddb.create_column_family("my_cf", config)
cf = db.get_column_family("my_cf")
names = db.list_column_families()print(names)
stats = cf.get_stats()print(f"Levels: {stats.num_levels}, Memtable: {stats.memtable_size} bytes")print(f"Total keys: {stats.total_keys}, Total data size: {stats.total_data_size} bytes")print(f"Avg key size: {stats.avg_key_size:.2f}, Avg value size: {stats.avg_value_size:.2f}")print(f"Read amplification: {stats.read_amp:.2f}, Hit rate: {stats.hit_rate:.2%}")print(f"Keys per level: {stats.level_key_counts}")
# B+tree klog stats (only populated if use_btree=True)if stats.use_btree: print(f"B+tree total nodes: {stats.btree_total_nodes}") print(f"B+tree max height: {stats.btree_max_height}") print(f"B+tree avg height: {stats.btree_avg_height:.2f}")
db.rename_column_family("my_cf", "new_cf")
db.drop_column_family("new_cf")Cloning a Column Family
Create a complete copy of an existing column family with a new name. The clone contains all the data from the source at the time of cloning and is completely independent.
# Clone an existing column familydb.clone_column_family("source_cf", "cloned_cf")
# Both column families now exist independentlysource = db.get_column_family("source_cf")clone = db.get_column_family("cloned_cf")
# Modifications to one do not affect the otherwith db.begin_txn() as txn: txn.put(source, b"key", b"new_value") txn.commit()
with db.begin_txn() as txn: # clone still has the original value value = txn.get(clone, b"key")Use cases
- Testing · Create a copy of production data for testing without affecting the original
- Branching · Create a snapshot of data before making experimental changes
- Migration · Clone data before schema or configuration changes
- Backup verification · Clone and verify data integrity without modifying the source
Transactions
txn = db.begin_txn()
try: # Put key-value pairs (TTL -1 means no expiration) txn.put(cf, b"key1", b"value1", ttl=-1) txn.put(cf, b"key2", b"value2", ttl=-1)
value = txn.get(cf, b"key1")
txn.delete(cf, b"key2")
txn.commit()except tidesdb.TidesDBError as e: txn.rollback() raisefinally: txn.close()
# Using context manager (auto-rollback on exception, auto-close)with db.begin_txn() as txn: txn.put(cf, b"key", b"value") txn.commit()Transaction Reset
reset() resets a committed or aborted transaction for reuse with a new isolation level. This avoids the overhead of freeing and reallocating transaction resources in hot loops.
txn = db.begin_txn()
# First batch of worktxn.put(cf, b"key1", b"value1")txn.commit()
# Reset instead of close + begin_txntxn.reset(tidesdb.IsolationLevel.READ_COMMITTED)
# Second batch of work using the same transactiontxn.put(cf, b"key2", b"value2")txn.commit()
# Free once when donetxn.close()Batch processing example
txn = db.begin_txn()
for batch in batches: for key, value in batch: txn.put(cf, key, value) txn.commit() txn.reset(tidesdb.IsolationLevel.READ_COMMITTED)
txn.close()Behavior
- The transaction must be committed or rolled back before reset; resetting an active transaction raises
TidesDBError - Internal buffers are retained to avoid reallocation
- A fresh transaction ID and snapshot sequence are assigned
- The isolation level can be changed on each reset
When to use
- Batch processing · Reuse a single transaction across many commit cycles in a loop
- Connection pooling · Reset a transaction for a new request without reallocation
- High-throughput ingestion · Reduce allocation overhead in tight write loops
TTL (Time-To-Live)
import time
# Set TTL as Unix timestamp (seconds since epoch)ttl = int(time.time()) + 60 # Expires in 60 seconds
with db.begin_txn() as txn: txn.put(cf, b"temp_key", b"temp_value", ttl=ttl) txn.commit()Isolation Levels
txn = db.begin_txn_with_isolation(tidesdb.IsolationLevel.SERIALIZABLE)
# Available levels# -- READ_UNCOMMITTED -- Sees all data including uncommitted changes# -- READ_COMMITTED -- Sees only committed data (default)# -- REPEATABLE_READ -- Consistent snapshot, phantom reads possible# -- SNAPSHOT -- Write-write conflict detection# -- SERIALIZABLE -- Full read-write conflict detection (SSI)Savepoints
with db.begin_txn() as txn: txn.put(cf, b"key1", b"value1")
txn.savepoint("sp1")
txn.put(cf, b"key2", b"value2")
# Rollback to savepoint (key2 discarded, key1 remains) txn.rollback_to_savepoint("sp1")
# Or release savepoint without rollback # txn.release_savepoint("sp1")
txn.commit() # Only key1 is writtenIterators
with db.begin_txn() as txn: with txn.new_iterator(cf) as it: # Forward iteration it.seek_to_first() while it.valid(): key = it.key() value = it.value() print(f"{key} = {value}") it.next()
# Backward iteration it.seek_to_last() while it.valid(): print(f"{it.key()} = {it.value()}") it.prev()
# Seek to specific key it.seek(b"user:") # First key >= "user:" it.seek_for_prev(b"user:z") # Last key <= "user:z"
it.seek_to_first() for key, value in it: print(f"{key} = {value}")Maintenance Operations
# Manual compaction (queues compaction)cf.compact()
# Manual memtable flush (sorted run to L1)cf.flush_memtable()
# Check if flush/compaction is in progressif cf.is_flushing(): print("Flush in progress")if cf.is_compacting(): print("Compaction in progress")
# Get cache statisticscache_stats = db.get_cache_stats()print(f"Cache hits: {cache_stats.hits}, misses: {cache_stats.misses}")print(f"Hit rate: {cache_stats.hit_rate:.2%}")Backup
# Create an on-disk snapshot without blocking reads/writesdb.backup("./mydb_backup")
# The backup directory must be non-existent or empty# Backup can be opened as a normal databasewith tidesdb.TidesDB.open("./mydb_backup") as backup_db: # ... read from backup passCheckpoint
Create a lightweight, near-instant snapshot of the database using hard links instead of copying SSTable data.
# Create a checkpoint (near-instant, uses hard links)db.checkpoint("./mydb_checkpoint")
# The checkpoint directory must be non-existent or empty# Checkpoint can be opened as a normal databasewith tidesdb.TidesDB.open("./mydb_checkpoint") as checkpoint_db: # ... read from checkpoint passCheckpoint vs Backup
backup() | checkpoint() | |
|---|---|---|
| Speed | Copies every SSTable byte-by-byte | Near-instant (hard links, O(1) per file) |
| Disk usage | Full independent copy | No extra disk until compaction removes old SSTables |
| Portability | Can be moved to another filesystem or machine | Same filesystem only (hard link requirement) |
| Use case | Archival, disaster recovery, remote shipping | Fast local snapshots, point-in-time reads, streaming backups |
Behavior
- Requires
checkpoint_dirto be a non-existent directory or an empty directory - For each column family: flushes the active memtable, halts compactions, hard links all SSTable files, copies small metadata files, then resumes compactions
- Falls back to file copy if hard linking fails (e.g., cross-filesystem)
- Database stays open and usable during checkpoint
Updating Runtime Configuration
cf = db.get_column_family("my_cf")
# Update runtime-safe configuration settingsnew_config = tidesdb.default_column_family_config()new_config.write_buffer_size = 256 * 1024 * 1024 # 256MBnew_config.bloom_fpr = 0.001 # 0.1% false positive ratenew_config.sync_mode = tidesdb.SyncMode.SYNC_FULL
# persist_to_disk=True saves to config.ini (default)cf.update_runtime_config(new_config, persist_to_disk=True)
# Updatable settings (safe to change at runtime):# -- write_buffer_size -- Memtable flush threshold# -- skip_list_max_level -- Skip list level for new memtables# -- skip_list_probability -- Skip list probability for new memtables# -- bloom_fpr -- False positive rate for new SSTables# -- index_sample_ratio -- Index sampling ratio for new SSTables# -- sync_mode -- Durability mode# -- sync_interval_us -- Sync interval in microseconds
# Save config to custom INI filetidesdb.save_config_to_ini("custom_config.ini", "my_cf", new_config)B+tree KLog Format (Optional)
Column families can optionally use a B+tree structure for the key log instead of the default block-based format. The B+tree klog format offers faster point lookups through O(log N) tree traversal.
config = tidesdb.default_column_family_config()config.use_btree = True # Enable B+tree klog format
db.create_column_family("btree_cf", config)Characteristics
- Point lookups · O(log N) tree traversal with binary search at each node
- Range scans · Doubly-linked leaf nodes enable efficient bidirectional iteration
- Immutable · Tree is bulk-loaded from sorted memtable data during flush
- Compression · Nodes compress independently using the same algorithms
When to use B+tree klog format
- Read-heavy workloads with frequent point lookups
- Workloads where read latency is more important than write throughput
- Large SSTables where block scanning becomes expensive
Tradeoffs
- Slightly higher write amplification during flush
- Larger metadata overhead per node
- Block-based format may be faster for sequential scans
Compression Algorithms
config = tidesdb.default_column_family_config()
# Available algorithms:config.compression_algorithm = tidesdb.CompressionAlgorithm.NO_COMPRESSIONconfig.compression_algorithm = tidesdb.CompressionAlgorithm.SNAPPY_COMPRESSIONconfig.compression_algorithm = tidesdb.CompressionAlgorithm.LZ4_COMPRESSIONconfig.compression_algorithm = tidesdb.CompressionAlgorithm.LZ4_FAST_COMPRESSIONconfig.compression_algorithm = tidesdb.CompressionAlgorithm.ZSTD_COMPRESSIONSync Modes
config = tidesdb.default_column_family_config()
# SYNC_NONE: Fastest, least durable (OS handles flushing)config.sync_mode = tidesdb.SyncMode.SYNC_NONE
# SYNC_INTERVAL: Balanced (periodic background syncing)config.sync_mode = tidesdb.SyncMode.SYNC_INTERVALconfig.sync_interval_us = 128000 # 128ms
# SYNC_FULL: Most durable (fsync on every write)config.sync_mode = tidesdb.SyncMode.SYNC_FULLLog Levels
import tidesdb
# Available log levelsconfig = tidesdb.Config( db_path="./mydb", log_level=tidesdb.LogLevel.LOG_DEBUG, # Detailed diagnostic info # log_level=tidesdb.LogLevel.LOG_INFO, # General info (default) # log_level=tidesdb.LogLevel.LOG_WARN, # Warnings only # log_level=tidesdb.LogLevel.LOG_ERROR, # Errors only # log_level=tidesdb.LogLevel.LOG_FATAL, # Critical errors only # log_level=tidesdb.LogLevel.LOG_NONE, # Disable logging log_to_file=True, # Write to ./mydb/LOG instead of stderr log_truncation_at=24 * 1024 * 1024, # Truncate log file at 24MB (0 = no truncation))Column Family Configuration Reference
All available configuration options for column families:
config = tidesdb.default_column_family_config()
# Memory and LSM structureconfig.write_buffer_size = 64 * 1024 * 1024 # Memtable flush threshold (default: 64MB)config.level_size_ratio = 10 # Level size multiplier (default: 10)config.min_levels = 5 # Minimum LSM levels (default: 5)config.dividing_level_offset = 2 # Compaction dividing level offset (default: 2)
# Skip list settingsconfig.skip_list_max_level = 12 # Skip list max level (default: 12)config.skip_list_probability = 0.25 # Skip list probability (default: 0.25)
# Compressionconfig.compression_algorithm = tidesdb.CompressionAlgorithm.LZ4_COMPRESSION
# Bloom filterconfig.enable_bloom_filter = True # Enable bloom filters (default: True)config.bloom_fpr = 0.01 # 1% false positive rate (default: 0.01)
# Block indexesconfig.enable_block_indexes = True # Enable block indexes (default: True)config.index_sample_ratio = 1 # Sample every block (default: 1)config.block_index_prefix_len = 16 # Block index prefix length (default: 16)
# Durabilityconfig.sync_mode = tidesdb.SyncMode.SYNC_INTERVALconfig.sync_interval_us = 128000 # Sync interval in microseconds (default: 128ms)
# Key orderingconfig.comparator_name = "memcmp" # Comparator name (default: "memcmp")
# Value separationconfig.klog_value_threshold = 512 # Values > threshold go to vlog (default: 512)
# Resource limitsconfig.min_disk_space = 100 * 1024 * 1024 # Minimum disk space required (default: 100MB)
# Transaction isolationconfig.default_isolation_level = tidesdb.IsolationLevel.READ_COMMITTED
# Compaction triggersconfig.l1_file_count_trigger = 4 # L1 file count trigger (default: 4)config.l0_queue_stall_threshold = 20 # L0 queue stall threshold (default: 20)
# B+tree klog format (optional)config.use_btree = False # Use B+tree klog format (default: False)Custom Comparators
TidesDB uses comparators to determine the sort order of keys. Built-in comparators are automatically registered:
"memcmp"(default): Binary byte-by-byte comparison"lexicographic": Null-terminated string comparison"uint64": Unsigned 64-bit integer comparison"int64": Signed 64-bit integer comparison"reverse": Reverse binary comparison"case_insensitive": Case-insensitive ASCII comparison
# Use a built-in comparatorconfig = tidesdb.default_column_family_config()config.comparator_name = "reverse" # Descending orderdb.create_column_family("reverse_cf", config)
# Register a custom comparatordef timestamp_desc_compare(key1: bytes, key2: bytes) -> int: """Compare 8-byte timestamps in descending order.""" import struct if len(key1) != 8 or len(key2) != 8: # Fallback to memcmp for invalid sizes if key1 < key2: return -1 elif key1 > key2: return 1 return 0
ts1 = struct.unpack("<Q", key1)[0] ts2 = struct.unpack("<Q", key2)[0]
# Reverse order for newest-first if ts1 > ts2: return -1 elif ts1 < ts2: return 1 return 0
# Register before creating column families that use itdb.register_comparator("timestamp_desc", timestamp_desc_compare)
# Use the custom comparatorconfig = tidesdb.default_column_family_config()config.comparator_name = "timestamp_desc"db.create_column_family("events", config)Error Handling
try: value = txn.get(cf, b"nonexistent_key")except tidesdb.TidesDBError as e: print(f"Error: {e}") print(f"Error code: {e.code}")