Git Product home page Git Product logo

Comments (6)

xsduan avatar xsduan commented on May 27, 2024 7

nope, it looks like the async files are just always slower. I've also tested aiofiles, and it's just really slow. Well that's disappointing.

import asyncio
import time
import random
import os
import sys
from uuid import uuid4

from aiofile import AIOFile, LineReader, Writer
from aiofiles import open as aio_open


_files = 10
_iters = 10 ** 4
_rand_max = 10


def read_sync(fname):
    freqs = [0] * _rand_max
    with open(fname, "r") as fp:
        for line in fp:
            num = int(line.strip())
            freqs[num] -= 1
    return freqs


def write_sync(fname):
    freqs = [0] * _rand_max
    with open(fname, "w") as fp:
        for _ in range(_iters):
            num = random.randrange(0, _rand_max)
            freqs[num] += 1
            fp.write(f"{num}\n")
    return freqs


def test_sync():
    fnames = [f"{uuid4()}.txt" for _ in range(_files)]

    freqs = map(write_sync, fnames)
    write_freqs = dict(zip(fnames, freqs))

    freqs = map(read_sync, fnames)
    read_freqs = dict(zip(fnames, freqs))

    return {
        name: [w + r for w, r in zip(write_freqs[name], read_freqs[name])]
        for name in fnames
    }


async def read_aiofile(fname):
    freqs = [0] * 10
    async with AIOFile(fname, "r") as fp:
        r = LineReader(fp)
        async for line in r:
            num = int(line.strip())
            freqs[num] -= 1
    return freqs


async def write_aiofile(fname):
    freqs = [0] * 10
    async with AIOFile(fname, "w") as fp:
        w = Writer(fp)
        for _ in range(_iters):
            num = random.randrange(0, 10)
            freqs[num] += 1
            await w(f"{num}\n")
    return freqs


async def read_aiofiles(fname):
    freqs = [0] * 10
    async with aio_open(fname, "r") as fp:
        async for line in fp:
            num = int(line.strip())
            freqs[num] -= 1
    return freqs


async def write_aiofiles(fname):
    freqs = [0] * 10
    async with aio_open(fname, "w") as fp:
        for _ in range(_iters):
            num = random.randrange(0, 10)
            freqs[num] += 1
            await fp.write(f"{num}\n")
    return freqs


async def test_async(reader, writer):
    fnames = [f"{uuid4()}.txt" for _ in range(_files)]

    freqs = await asyncio.gather(*map(writer, fnames))
    write_freqs = dict(zip(fnames, freqs))

    freqs = await asyncio.gather(*map(reader, fnames))
    read_freqs = dict(zip(fnames, freqs))

    return {
        name: [w + r for w, r in zip(write_freqs[name], read_freqs[name])]
        for name in fnames
    }


async def test_executor():
    loop = asyncio.get_event_loop()
    return await loop.run_in_executor(None, test_sync)


async def test_multi_job_executor():
    async def print_and_sleep():
        while True:
            print(time.time(), file=sys.stderr)
            await asyncio.sleep(0.01)

    freqs, pending = await asyncio.wait(
        (
            asyncio.ensure_future(test_executor()),
            asyncio.ensure_future(print_and_sleep()),
        ),
        return_when=asyncio.FIRST_COMPLETED,
    )
    for co in pending:
        co.cancel()
    return await list(freqs)[0]


def test_sync_one():
    fname = f"{uuid4()}.txt"
    write_freq = write_sync(fname)
    read_freq = read_sync(fname)
    return (fname, [w + r for w, r in zip(write_freq, read_freq)])


async def test_executor_parallel():
    loop = asyncio.get_event_loop()
    return dict(
        await asyncio.gather(
            *(loop.run_in_executor(None, test_sync_one) for _ in range(_files))
        )
    )


async def time_coroutine(co):
    t = time.perf_counter()
    ret = await co
    print(time.perf_counter() - t)
    return ret


def time_callable(cb):
    t = time.perf_counter()
    ret = cb()
    print(time.perf_counter() - t)
    return ret


def check(freqs, name):
    if all(all(v == 0 for v in f) for f in freqs.values()):
        print(name, "passed")
    else:
        print(name, "failed")

    for fname in freqs:
        os.remove(fname)


async def run_async_tests():
    freqs = await time_coroutine(test_executor())
    check(freqs, "async (executor)")
    freqs = await time_coroutine(test_multi_job_executor())
    check(freqs, "async (executor w/ simultaneous coroutines)")
    freqs = await time_coroutine(test_executor_parallel())
    check(freqs, "async (multiple executors)")
    freqs = await time_coroutine(test_async(read_aiofiles, write_aiofiles))
    check(freqs, "async (aiofiles)")
    freqs = await time_coroutine(test_async(read_aiofile, write_aiofile))
    check(freqs, "async (aiofile)")


while _iters <= 10 ** 8:
    print("with", _iters, "numbers")

    # synchronous code as a baseline
    freqs = time_callable(lambda: test_sync())
    check(freqs, "sync")

    # run sync in executor – the "dumb way"
    freqs = time_callable(lambda: asyncio.run(test_executor()))
    check(freqs, "async (executor)")

    # make sure that async actually picks up the thread while i/o is happening
    freqs = time_callable(lambda: asyncio.run(test_multi_job_executor()))
    check(freqs, "async (executor w/ simultaneous coroutines)")

    # do multiple file i/o sequences in parallel
    freqs = time_callable(lambda: asyncio.run(test_executor_parallel()))
    check(freqs, "async (multiple executors)")

    # test Tinche/aiofiles
    freqs = time_callable(
        lambda: asyncio.run(test_async(read_aiofiles, write_aiofiles))
    )
    check(freqs, "async (aiofiles)")

    # test mosquito/aiofile
    freqs = time_callable(lambda: asyncio.run(test_async(read_aiofile, write_aiofile)))
    check(freqs, "async (aiofile)")

    asyncio.run(run_async_tests())

    _iters *= 10
$ py -VV
Python 3.7.3 (default, Mar 28 2019, 10:27:50) 
[Clang 10.0.1 (clang-1001.0.46.3)] on darwin
$ sysctl -n machdep.cpu.brand_string
Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
$ system_profiler SPSoftwareDataType | grep macOS
      System Version: macOS 10.14.5 (18F132)

(I only did 1 run so this is probably only accurate to within 100 ms)

sync executor executor + busy multiple executors Tinche/aiofiles mosquito/aiofile
1e4 232.4 ms 229.8 ms 248.5 ms 245.1 ms 12.56 s 6.721 s
1e4 (in task) 230.4 ms 249.5 ms 245.0 ms 12.77 s 6.883 s
1e5 2.260 s 2.241 s 2.263 s 2.372 s 122.0 s 169.7 s
1e5 (in task) 2.475 s 2.480 s 2.310 s 122.7 s 165.0 s
1e6 22.40 s 22.37 s 22.68 s 23.54 s 1193.9 s
1e6 (in task) 22.98 s 24.25 s 23.76 s

I didn't complete the 1e6 run because I think we get the point.

It looks like the asyncio overhead is not the main problem, but rather the threadpool. As far as I know, there's no environment variable to expand the threadpool size like UV_THREADPOOL_SIZE for libuv. Basically, your best bet is to simply run the sync version in an executor.

from aiofile.

mosquito avatar mosquito commented on May 27, 2024 7

My tests on really fast NVME Linux server bellow.

photo_2020-06-23 15 04 58

from aiofile.

mosquito avatar mosquito commented on May 27, 2024 3

After few months of development, caio has been released. caio has several implementations of low-level interfaces to asynchronous file IO (Linux specific libaio, c-threadpool, and pure-python threadpool-based).

The aio.h-based implementation has been dropped, and aiofile using caio under the hood.

from aiofile.

kazimsarikaya avatar kazimsarikaya commented on May 27, 2024

@xsduan sorry for that but this aiofile implementation uses while loop for polling aio result which blocks cpu at darwin aka osx.

from aiofile.

funkyHat avatar funkyHat commented on May 27, 2024

Will you make a release of a version using caio?

from aiofile.

mosquito avatar mosquito commented on May 27, 2024

Is now aiofile>=2 using caio

from aiofile.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.