Git Product home page Git Product logo

go-reader's Introduction

go-reader

There are many interfaces for reading files. This one is ours. It returns io.ReadSeekCloser instances.

Documentation

Go Reference

Example

Readers are instantiated with the reader.NewReader method which takes as its arguments a context.Context instance and a URI string. The URI's scheme represents the type of reader it implements and the remaining (URI) properties are used by that reader type to instantiate itself.

For example to read files from a directory on the local filesystem you would write:

package main

import (
	"context"
	"github.com/whosonfirst/go-reader"
	"io"
	"os"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "file:///usr/local/data")
	fh, _ := r.Read(ctx, "example.txt")
	defer fh.Close()
	io.Copy(os.Stdout, fh)
}

There is also a handy "null" reader in case you need a "pretend" reader that doesn't actually do anything:

package main

import (
	"context"
	"github.com/whosonfirst/go-reader"
	"io"
	"os"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "null://")
	fh, _ := r.Read(ctx, "example.txt")
	defer fh.Close()
	io.Copy(os.Stdout, fh)
}

Interfaces

reader.Reader

type Reader interface {
	Read(context.Context, string) (io.ReadSeekCloser, error)
	ReaderURI(context.Context, string) string
}

Custom readers

Custom readers need to:

  1. Implement the interface above.
  2. Announce their availability using the go-reader.RegisterReader method on initialization, passing in an initialization function implementing the go-reader.ReaderInitializationFunc interface.

For example, this is how the go-reader-http reader is implemented:

package reader

import (
	"context"
	"errors"
	wof_reader "github.com/whosonfirst/go-reader"
	"github.com/whosonfirst/go-ioutil"
	"io"
	_ "log"
	"net/http"
	"net/url"
	"path/filepath"
	"time"
)

type HTTPReader struct {
	wof_reader.Reader
	url      *url.URL
	throttle <-chan time.Time
}

func init() {

	ctx := context.Background()

	schemes := []string{
		"http",
		"https",
	}

	for _, s := range schemes {

		err := wof_reader.RegisterReader(ctx, s, NewHTTPReader)

		if err != nil {
			panic(err)
		}
	}
}

func NewHTTPReader(ctx context.Context, uri string) (wof_reader.Reader, error) {

	u, err := url.Parse(uri)

	if err != nil {
		return nil, err
	}

	rate := time.Second / 3
	throttle := time.Tick(rate)

	r := HTTPReader{
		throttle: throttle,
		url:      u,
	}

	return &r, nil
}

func (r *HTTPReader) Read(ctx context.Context, uri string) (io.ReadSeekCloser, error) {

	<-r.throttle

	u, _ := url.Parse(r.url.String())
	u.Path = filepath.Join(u.Path, uri)

	url := u.String()

	rsp, err := http.Get(url)

	if err != nil {
		return nil, err
	}

	if rsp.StatusCode != 200 {
		return nil, errors.New(rsp.Status)
	}

	fh, err := ioutil.NewReadSeekCloser(rsp.Body)

	if err != nil {
		return nil, err
	}

	return fh, nil
}

func (r *HTTPReader) ReaderURI(ctx context.Context, uri string) string {
	return uri
}

And then to use it you would do this:

package main

import (
	"context"
	"github.com/whosonfirst/go-reader"
	_ "github.com/whosonfirst/go-reader-http"	
	"io"
	"os"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "https://data.whosonfirst.org")
	fh, _ := r.Read(ctx, "101/736/545/101736545.geojson")
	defer fh.Close()
	io.Copy(os.Stdout, fh)
}

Available readers

"blob"

Read files from any registered Go Cloud Blob source. For example:

import (
	"context"
	"github.com/whosonfirst/go-reader"
	_ "github.com/whosonfirst/go-reader-blob"
	_ "gocloud.dev/blob/s3blob"	
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "s3://whosonfirst-data?region=us-west-1")
}

github://

Read files from a GitHub repository.

import (
	"context"
	"github.com/whosonfirst/go-reader"
	_ "github.com/whosonfirst/go-reader-github"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "github://{GITHUB_OWNER}/{GITHUB_REPO}")

	// to specify a specific branch you would do this:
	// r, _ := reader.NewReader(ctx, "github://{GITHUB_OWNER}/{GITHUB_REPO}?branch={GITHUB_BRANCH}")
}

githubapi://

Read files from a GitHub repository using the GitHub API.

import (
	"context"
	"github.com/whosonfirst/go-reader"
	_ "github.com/whosonfirst/go-reader-github"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "githubapi://{GITHUB_OWNER}/{GITHUB_REPO}?access_token={GITHUBAPI_ACCESS_TOKEN}")

	// to specify a specific branch you would do this:
	// r, _ := reader.NewReader(ctx, "githubapi://{GITHUB_OWNER}/{GITHUB_REPO}/?branch={GITHUB_BRANCH}&access_token={GITHUBAPI_ACCESS_TOKEN}")
}

http:// and https://

Read files from an HTTP(S) endpoint.

import (
	"context"
	"github.com/whosonfirst/go-reader"
	_ "github.com/whosonfirst/go-reader-http"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "https://{HTTP_HOST_AND_PATH}")
}

file://

Read files from a local filesystem.

import (
	"context"
	"github.com/whosonfirst/go-reader"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "file://{PATH_TO_DIRECTORY}")
}

If you are importing the go-reader-blob package and using the GoCloud's fileblob driver then instantiating the file:// scheme will fail since it will have already been registered. You can work around this by using the fs:// scheme. For example:

r, _ := reader.NewReader(ctx, "fs://{PATH_TO_DIRECTORY}")

null://

Pretend to read files.

import (
	"context"
	"github.com/whosonfirst/go-reader"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "null://")
}

repo://

This is a convenience scheme for working with Who's On First data repositories.

It will update a URI by appending a data directory to its path and changing its scheme to fs:// before invoking reader.NewReader with the updated URI.

import (
	"context"
	"github.com/whosonfirst/go-reader"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "repo:///usr/local/data/whosonfirst-data-admin-ca")
}

stdin://

Read "files" from STDIN

import (
	"context"
	"github.com/whosonfirst/go-reader"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "stdin://")
}

And then to use, something like:

> cat README.md | ./bin/read -reader-uri stdin:// - | wc -l
     339

Note the use of - for a URI. This is the convention (when reading from STDIN) but it can be whatever you want it to be.

See also

go-reader's People

Contributors

missinglink avatar straup avatar thisisaaronland avatar

Stargazers

 avatar

Watchers

 avatar  avatar  avatar

Forkers

missinglink

go-reader's Issues

Streaming I/O

I was checking out this package today and noticed that it doesn't currently support streaming I/O operations.

The new stdin scheme slurps the whole stream into an in memory buffer and then returns it, rather than returning a reader that can lazily request bytes as required.

I think this is mainly to conform to the ReadSeekCloser interface?

Things like unix pipes/network requests etc. unfortunately can't 'seek', which makes them hard to conform into this interface without buffering or dropping support for seek.

I just wanted to bring this to your attention, if we wanted to support streams larger than memory in the future (such as piping collections of features) then this pattern would be constrained by available memory.

diff --git a/stdin.go b/stdin.go
index 6a07fac..adee46a 100644
--- a/stdin.go
+++ b/stdin.go
@@ -2,11 +2,11 @@ package reader

 import (
        "bufio"
-       "bytes"
        "context"
        "github.com/whosonfirst/go-ioutil"
        "io"
        "os"
 )

 type StdinReader struct {
@@ -30,22 +30,10 @@ func NewStdinReader(ctx context.Context, uri string) (Reader, error) {
 }

 func (r *StdinReader) Read(ctx context.Context, uri string) (io.ReadSeekCloser, error) {
-
-       var b bytes.Buffer
-       wr := bufio.NewWriter(&b)
-
-       _, err := io.Copy(wr, os.Stdin)
-
-       if err != nil {
-               return nil, err
-       }
-
-       wr.Flush()
-
-       br := bytes.NewReader(b.Bytes())
-       return ioutil.NewReadSeekCloser(br)
+       return ioutil.NewReadSeekCloser(os.Stdin)
 }

 func (r *StdinReader) ReaderURI(ctx context.Context, uri string) string {
        return "-"
 }

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    ๐Ÿ–– Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. ๐Ÿ“Š๐Ÿ“ˆ๐ŸŽ‰

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google โค๏ธ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.