Is there an ETS setting that prevents a table from being read while it’s being written to? If not, what practical measures can you recommend to avoid dirty reads?
I’m not aware of such setting, but I can imagine some sort of a semaphore/lock based on :atomics (available since OTP 21.2).
Naive implementation with :atomics
reference stored in :persistent_term
could be:
defmodule Storage do
use GenServer
@atomics_index 1
@unlocked 0
@locked 1
def start_link(args \\ []) do
GenServer.start_link(__MODULE__, args, name: __MODULE__)
end
def init(args) do
atomic_ref = :atomics.new(@atomics_index, [])
:persistent_term.put(:storage_locker_ref, atomic_ref)
:ets.new(__MODULE__, [:named_table])
{:ok, args}
end
def lookup(key) do
if unlocked?() do
:ets.lookup(__MODULE__, key)
else
Process.sleep(10)
lookup(key)
end
end
def insert(data) do
GenServer.cast(__MODULE__, {:insert, data})
end
def handle_cast({:insert, data}, state) do
do_insert(data)
{:noreply, state}
end
defp do_insert(data) do
lock()
IO.inspect("locked")
Process.sleep(3000) # for testing purposes
:ets.insert(__MODULE__, data)
IO.inspect("unlocked")
unlock()
end
defp unlock() do
:storage_locker_ref
|> :persistent_term.get()
|> :atomics.put(@atomics_index, @unlocked)
end
defp lock() do
:storage_locker_ref
|> :persistent_term.get()
|> :atomics.put(@atomics_index, @locked)
end
defp unlocked?() do
:storage_locker_ref
|> :persistent_term.get()
|> :atomics.get(@atomics_index) == @unlocked
end
end
and we can:
iex(1)> Storage.start_link()
{:ok, #PID<0.208.0>}
iex(2)> Task.start(fn -> Storage.insert({:foo, :bar}) end)
{:ok, #PID<0.210.0>}
"locked"
iex(3)> Storage.lookup(:foo) # blocks the process until Storage is "unlocked"
"unlocked"
[foo: :bar]
defmodule ETSTableToList do
def validate(output, table_name, table_size) when length(output) < table_size,
do: execute(table_name)
def validate(output, _table_name, _table_size), do: output
def get_table_size(table_name) do
[
_id,
_decentralized_counters,
_read_concurrency,
_write_concurrency,
_compressed,
_memory,
_owner,
_heir,
_name,
{:size, table_size} | _
] = :ets.info(table_name)
table_size
end
def execute(table_name) do
table_size = get_table_size(table_name)
try do
:ets.tab2list(table_name)
rescue
ArgumentError -> execute(table_name)
else
output -> validate(output, table_name, table_size)
end
end
end
Do you see any pitfalls with this approach?
Do you see any pitfalls with this approach?
I don’t understand the problem you are trying to solve. But thing to keep in mind that in execute/1
function between or even “during” the calls get_table_size/1
, :etc.tab2list/1
and validate/3
- ets table can be modified by other processes unless its owner process “protects” from it…
I see your point. Without preventing dirty reads race conditions will always be a problem.