Comments (6)
nope, it looks like the async files are just always slower. I've also tested aiofiles, and it's just really slow. Well that's disappointing.
import asyncio
import time
import random
import os
import sys
from uuid import uuid4
from aiofile import AIOFile, LineReader, Writer
from aiofiles import open as aio_open
_files = 10
_iters = 10 ** 4
_rand_max = 10
def read_sync(fname):
freqs = [0] * _rand_max
with open(fname, "r") as fp:
for line in fp:
num = int(line.strip())
freqs[num] -= 1
return freqs
def write_sync(fname):
freqs = [0] * _rand_max
with open(fname, "w") as fp:
for _ in range(_iters):
num = random.randrange(0, _rand_max)
freqs[num] += 1
fp.write(f"{num}\n")
return freqs
def test_sync():
fnames = [f"{uuid4()}.txt" for _ in range(_files)]
freqs = map(write_sync, fnames)
write_freqs = dict(zip(fnames, freqs))
freqs = map(read_sync, fnames)
read_freqs = dict(zip(fnames, freqs))
return {
name: [w + r for w, r in zip(write_freqs[name], read_freqs[name])]
for name in fnames
}
async def read_aiofile(fname):
freqs = [0] * 10
async with AIOFile(fname, "r") as fp:
r = LineReader(fp)
async for line in r:
num = int(line.strip())
freqs[num] -= 1
return freqs
async def write_aiofile(fname):
freqs = [0] * 10
async with AIOFile(fname, "w") as fp:
w = Writer(fp)
for _ in range(_iters):
num = random.randrange(0, 10)
freqs[num] += 1
await w(f"{num}\n")
return freqs
async def read_aiofiles(fname):
freqs = [0] * 10
async with aio_open(fname, "r") as fp:
async for line in fp:
num = int(line.strip())
freqs[num] -= 1
return freqs
async def write_aiofiles(fname):
freqs = [0] * 10
async with aio_open(fname, "w") as fp:
for _ in range(_iters):
num = random.randrange(0, 10)
freqs[num] += 1
await fp.write(f"{num}\n")
return freqs
async def test_async(reader, writer):
fnames = [f"{uuid4()}.txt" for _ in range(_files)]
freqs = await asyncio.gather(*map(writer, fnames))
write_freqs = dict(zip(fnames, freqs))
freqs = await asyncio.gather(*map(reader, fnames))
read_freqs = dict(zip(fnames, freqs))
return {
name: [w + r for w, r in zip(write_freqs[name], read_freqs[name])]
for name in fnames
}
async def test_executor():
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, test_sync)
async def test_multi_job_executor():
async def print_and_sleep():
while True:
print(time.time(), file=sys.stderr)
await asyncio.sleep(0.01)
freqs, pending = await asyncio.wait(
(
asyncio.ensure_future(test_executor()),
asyncio.ensure_future(print_and_sleep()),
),
return_when=asyncio.FIRST_COMPLETED,
)
for co in pending:
co.cancel()
return await list(freqs)[0]
def test_sync_one():
fname = f"{uuid4()}.txt"
write_freq = write_sync(fname)
read_freq = read_sync(fname)
return (fname, [w + r for w, r in zip(write_freq, read_freq)])
async def test_executor_parallel():
loop = asyncio.get_event_loop()
return dict(
await asyncio.gather(
*(loop.run_in_executor(None, test_sync_one) for _ in range(_files))
)
)
async def time_coroutine(co):
t = time.perf_counter()
ret = await co
print(time.perf_counter() - t)
return ret
def time_callable(cb):
t = time.perf_counter()
ret = cb()
print(time.perf_counter() - t)
return ret
def check(freqs, name):
if all(all(v == 0 for v in f) for f in freqs.values()):
print(name, "passed")
else:
print(name, "failed")
for fname in freqs:
os.remove(fname)
async def run_async_tests():
freqs = await time_coroutine(test_executor())
check(freqs, "async (executor)")
freqs = await time_coroutine(test_multi_job_executor())
check(freqs, "async (executor w/ simultaneous coroutines)")
freqs = await time_coroutine(test_executor_parallel())
check(freqs, "async (multiple executors)")
freqs = await time_coroutine(test_async(read_aiofiles, write_aiofiles))
check(freqs, "async (aiofiles)")
freqs = await time_coroutine(test_async(read_aiofile, write_aiofile))
check(freqs, "async (aiofile)")
while _iters <= 10 ** 8:
print("with", _iters, "numbers")
# synchronous code as a baseline
freqs = time_callable(lambda: test_sync())
check(freqs, "sync")
# run sync in executor – the "dumb way"
freqs = time_callable(lambda: asyncio.run(test_executor()))
check(freqs, "async (executor)")
# make sure that async actually picks up the thread while i/o is happening
freqs = time_callable(lambda: asyncio.run(test_multi_job_executor()))
check(freqs, "async (executor w/ simultaneous coroutines)")
# do multiple file i/o sequences in parallel
freqs = time_callable(lambda: asyncio.run(test_executor_parallel()))
check(freqs, "async (multiple executors)")
# test Tinche/aiofiles
freqs = time_callable(
lambda: asyncio.run(test_async(read_aiofiles, write_aiofiles))
)
check(freqs, "async (aiofiles)")
# test mosquito/aiofile
freqs = time_callable(lambda: asyncio.run(test_async(read_aiofile, write_aiofile)))
check(freqs, "async (aiofile)")
asyncio.run(run_async_tests())
_iters *= 10
$ py -VV
Python 3.7.3 (default, Mar 28 2019, 10:27:50)
[Clang 10.0.1 (clang-1001.0.46.3)] on darwin
$ sysctl -n machdep.cpu.brand_string
Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
$ system_profiler SPSoftwareDataType | grep macOS
System Version: macOS 10.14.5 (18F132)
(I only did 1 run so this is probably only accurate to within 100 ms)
sync | executor | executor + busy | multiple executors | Tinche/aiofiles | mosquito/aiofile | |
---|---|---|---|---|---|---|
1e4 | 232.4 ms | 229.8 ms | 248.5 ms | 245.1 ms | 12.56 s | 6.721 s |
1e4 (in task) | – | 230.4 ms | 249.5 ms | 245.0 ms | 12.77 s | 6.883 s |
1e5 | 2.260 s | 2.241 s | 2.263 s | 2.372 s | 122.0 s | 169.7 s |
1e5 (in task) | – | 2.475 s | 2.480 s | 2.310 s | 122.7 s | 165.0 s |
1e6 | 22.40 s | 22.37 s | 22.68 s | 23.54 s | 1193.9 s | – |
1e6 (in task) | – | 22.98 s | 24.25 s | 23.76 s | – | – |
I didn't complete the 1e6 run because I think we get the point.
It looks like the asyncio overhead is not the main problem, but rather the threadpool. As far as I know, there's no environment variable to expand the threadpool size like UV_THREADPOOL_SIZE
for libuv. Basically, your best bet is to simply run the sync version in an executor.
from aiofile.
My tests on really fast NVME Linux server bellow.
from aiofile.
After few months of development, caio has been released. caio has several implementations of low-level interfaces to asynchronous file IO (Linux specific libaio, c-threadpool, and pure-python threadpool-based).
The aio.h
-based implementation has been dropped, and aiofile using caio under the hood.
from aiofile.
@xsduan sorry for that but this aiofile implementation uses while loop for polling aio result which blocks cpu at darwin aka osx.
from aiofile.
Will you make a release of a version using caio
?
from aiofile.
Is now aiofile>=2 using caio
from aiofile.
Related Issues (20)
- Can't install on Amazon Linux HOT 2
- Function not implemented HOT 1
- async_open does not create file if file does not exist in mode 'a+' HOT 1
- Add support for StringIO and BytesIO HOT 1
- Can I use aiofile.async_open without a with statement? HOT 4
- Race condition in `AIOFile.open()` HOT 4
- Some sort of changelog / release notes?
- async_open doesn't fully mimic the behavior of Python file objects HOT 5
- Manual context management example on readme has a bug HOT 1
- How to work with NFS? HOT 1
- Tag the source HOT 1
- AIOFile context manager looses data HOT 3
- aiofile LineReader does a read for every line in spite of having multiple lines in CHUNK_SIZE HOT 2
- Which of the methods are coroutine-safe? HOT 2
- Why is fsync calling fdsync HOT 2
- No flush API and no way to set unbuffered writes in binary mode HOT 3
- Add performance benchmarks to the readme HOT 3
- Unable to call close() twice
- Memory leak during import
- TextFileWrapper.read reads more than requested
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from aiofile.