Git Product home page Git Product logo

Comments (21)

0xnandhi avatar 0xnandhi commented on May 19, 2024 6

I was working on Har generator for my personal project using chromedp. I am new to golang and so as to Chromedp. this is my in progess code snippet.

import (
	"container/list"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"log"
	"net/url"
	"runtime"
	"strings"
	"sync"
	"time"

	"github.com/chromedp/cdproto/har"
	"github.com/chromedp/cdproto/network"
	"github.com/chromedp/chromedp"
)

type nvPair har.NameValuePair
type hRequest har.Request
type hResponse har.Response

var start = time.Now()

// String function to print the values stored in the
// har request structure.
func (r hRequest) String() string {
	sb := strings.Builder{}
	fmt.Fprintf(&sb, "Method : %s\n", r.Method)
	fmt.Fprintf(&sb, "Url : %s\n", r.URL)
	fmt.Fprintf(&sb, "Headers : \n")
	for _, header := range r.Headers {
		fmt.Fprintf(&sb, "%s: %s\n", header.Name, header.Value)
	}
	fmt.Fprintf(&sb, "Query string : \n")
	for _, qs := range r.QueryString {
		fmt.Fprintf(&sb, "%s: %s\n", qs.Name, qs.Value)
	}
	fmt.Fprintf(&sb, "\n")
	return sb.String()
}

func (r hResponse) String() string {
	sb := strings.Builder{}
	fmt.Fprintf(&sb, "Status : %d\n", r.Status)
	fmt.Fprintf(&sb, "Status Text : %s\n", r.StatusText)
	fmt.Fprintf(&sb, "HTTP Version : %s\n", r.HTTPVersion)
	for _, header := range r.Headers {
		fmt.Fprintf(&sb, "%s: %s\n", header.Name, header.Value)
	}
	return sb.String()
}

// process requests and return a structured data
func processRequest(r *network.EventRequestWillBeSent) *hRequest {
	req := hRequest{}
	// http method
	req.Method = r.Request.Method
	// http request url
	req.URL = r.Request.URL
	// http version.
	req.HTTPVersion = ""
	// Associated headers for the request.
	req.Headers = []*har.NameValuePair{}
	// headers from the *network.EventRequestWillBeSent are in the form,
	// map[key:value]. this needs to be converted to the form of a
	// har.NameValuePair
	for header := range r.Request.Headers {
		h := har.NameValuePair{}
		h.Name = header
		h.Value = r.Request.Headers[header].(string)
		req.Headers = append(req.Headers, &h)
	}
	// Store cookie details.
	req.Cookies = []*har.Cookie{}
	// Url Query stirngs details.
	req.QueryString = []*har.NameValuePair{}
	u, err := url.Parse(req.URL)
	if err != nil {
		log.Printf("[E] Invalid URL data recived : %v", err)
	}
	// Query strings are of the format name = []values when
	// received from the network.EventRequestWillBeSent. This
	// needs to be converted to the form of multiple name, value
	// pairs.
	for name := range u.Query() {
		if len(name) != 0 {
			values := u.Query()[name]
			for _, val := range values {
				req.QueryString = append(req.QueryString, &har.NameValuePair{
					Name:  name,
					Value: val,
				})
			}
		}
	}
	// req.Postdata points to the post data.
	req.PostData = nil
	//if req.Method == "POST" {
	//// Process the post data of the form *har.PostData
	//}
	log.Printf("Post Data : %s", r.Request.PostData)
	// TODO : to implement headersize and bodySize for the request
	req.HeadersSize = 0
	req.BodySize = 0
	return &req
}

func processResponse(r *network.EventResponseReceived) *hResponse {
	res := hResponse{}
	res.Status = r.Response.Status
	res.StatusText = r.Response.StatusText
	res.HTTPVersion = r.Response.Protocol
	// TODO : implement cookie information.
	res.Cookies = nil
	res.Headers = []*har.NameValuePair{}
	// headers from the *network.EventRequestWillBeSent are in the form,
	// map[key:value]. this needs to be converted to the form of a
	// har.NameValuePair
	for header := range r.Response.Headers {
		h := har.NameValuePair{}
		h.Name = header
		h.Value = r.Response.Headers[header].(string)
		res.Headers = append(res.Headers, &h)
	}
	// response content
	res.Content = &har.Content{}
	res.Content.MimeType = r.Response.MimeType
	res.Content.Size = 0

	// Redirect URL
	res.RedirectURL = ""
	res.HeadersSize = 0
	res.BodySize = 0
	return &res
}

//RunHeadlessBrowser executes URL in headless browser
func RunHeadlessBrowser(ctx context.Context, url string) error {

	var ws sync.WaitGroup
	nRequests := list.New()
	nResponses := list.New()

	chromedp.ListenTarget(ctx, func(v interface{}) {
		switch v.(type) {
		case *network.EventRequestWillBeSent:
			ws.Add(1)
			go func(r *network.EventRequestWillBeSent) {
				req := processRequest(r)
				rm := map[network.RequestID]*hRequest{}
				rm[r.RequestID] = req
				nRequests.PushBack(rm)
				ws.Done()
			}(v.(*network.EventRequestWillBeSent))
			break

		case *network.EventResponseReceived:
			ws.Add(1)
			go func(r *network.EventResponseReceived) {
				res := processResponse(r)
				rm := map[network.RequestID]*hResponse{}
				rm[r.RequestID] = res
				nResponses.PushBack(rm)
				ws.Done()
			}(v.(*network.EventResponseReceived))
			break
		case *network.EventDataReceived:
			// Fired when data chunk was received over the network.
			go func() {
				edr := v.(*network.EventDataReceived)
				log.Printf("Data Received : %d\n", edr.DataLength)
			}()
			// case *network.EventLoadingFinished:
			// 	go func() {
			// 		lf := v.(*network.EventLoadingFinished)
			// 		log.Printf("Loading finished : %f\n", lf.EncodedDataLength)
			// 	}()
			// case *network.EventLoadingFailed:
			// 	// Fired when HTTP request has failed to load.
			// 	go func() {
			// 		lf := v.(*network.EventLoadingFailed)
			// 		log.Printf("Loading finished : %s\n", lf.ErrorText)
			// 	}()
		}
	})

	chromedp.Run(ctx, network.Enable())
	err := chromedp.Run(ctx,
		chromedp.Navigate(url),
	)

	if err != nil {
		log.Printf("Failed run. %v\n", err)
		return err
	}

	// Wait for all the go routines to complete
	ws.Wait()

	for e := nRequests.Front(); e != nil; e = e.Next() {
		r := e.Value.(map[network.RequestID]*hRequest)
		for k := range r {
			e, err := json.Marshal(r[k])
			if err != nil {
				log.Printf("Error : %v\n", err)
			}
			log.Printf("\n%s\n", string(e))
		}
	}

	// for e := nResponses.Front(); e != nil; e = e.Next() {
	// 	r := e.Value.(*hResponse)
	// 	log.Printf("%s", *r)
	// }

	return nil
}

Not sure if this is the right way to proceed.

if this is the right way to go , i would be happy to submit the patch as an example. Please do let me know.

Thx.

from chromedp.

ZekeLu avatar ZekeLu commented on May 19, 2024 3

Sorry but we are not working on this feature recently. We don't have an ETA now. I will update here if we make any progress.

from chromedp.

kenshaw avatar kenshaw commented on May 19, 2024 1

Note: there are no immediate plans to implement this, however it is fairly straight forward. Please take a look at this project -- https://github.com/cyrus-and/chrome-har-capturer -- to see how a similar project in the NodeJS world generates / captures HAR files using the Chrome Debugging Protocol.

from chromedp.

derekperkins avatar derekperkins commented on May 19, 2024 1

Here's another similar library: https://github.com/sitespeedio/chrome-har

from chromedp.

rjeczalik avatar rjeczalik commented on May 19, 2024 1

I take back the question, just noticed there's a PR pending for Listen().

from chromedp.

kenshaw avatar kenshaw commented on May 19, 2024 1

I'm about 60% finished with changes that will make capturing output via a HAR file easy. I already pushed the code to generate the HAR file JSON types last weekend, should be finishing this up in the next couple days.

from chromedp.

vbisbest avatar vbisbest commented on May 19, 2024 1

Is there any progress on this? I also notice that there is a devtool call for "getHar()". Is there a way chromedp can wrap this call? https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/devtools/network/getHAR

from chromedp.

kenshaw avatar kenshaw commented on May 19, 2024

I don't believe there is an API for creating a HAR archive in the Chrome Debugging Protocol, so the answer is, "not out of the box." However, it would be fairly easy to create the actual HAR data by monitoring the events coming across the wire.

from chromedp.

rjeczalik avatar rjeczalik commented on May 19, 2024

However, it would be fairly easy to create the actual HAR data by monitoring the events coming across the wire.

@kenshaw How to listen to events, with WaitFrame()? Listen() seems to be a no-op:

https://github.com/knq/chromedp/blob/master/handler.go#L670-L672

from chromedp.

hartfordfive avatar hartfordfive commented on May 19, 2024

Great, i'll definitely give it a try once it's complete.

from chromedp.

alefebvre-ls avatar alefebvre-ls commented on May 19, 2024

@kenshaw Any updates on this feature?

from chromedp.

kenshaw avatar kenshaw commented on May 19, 2024

I've done the initial work of generating the data structures for this, but have not had any time to write the main logic for capturing data and storing it in the actual HAR file.

from chromedp.

hartfordfive avatar hartfordfive commented on May 19, 2024

Could you potentially point me in the direction of the code you have created so far?

Thanks again

from chromedp.

kenshaw avatar kenshaw commented on May 19, 2024

I created a har.json protocol definition, that defines all of the types in chromedp/cdp/har package. One should be able to populate the data types there and write the files to disk. It would mean capturing certain events being sent by the browser.

from chromedp.

eelcocramer avatar eelcocramer commented on May 19, 2024

Are there examples of capturing and storing data in a har with chromedp?

from chromedp.

s0verbose avatar s0verbose commented on May 19, 2024

Is anyone actively working on this atm? And/or is there a different case of capturing request/response data, that the HAR case could be modeled after?

from chromedp.

kenshaw avatar kenshaw commented on May 19, 2024

Yes, this is being worked on.

from chromedp.

mvdan avatar mvdan commented on May 19, 2024

Please see #370. That issue isn't about HAR, but it's probably one level underneath in terms of functionality.

from chromedp.

braindeaf avatar braindeaf commented on May 19, 2024

I guess this didn't happen yet?

from chromedp.

ZekeLu avatar ZekeLu commented on May 19, 2024

Not yet.

from chromedp.

rickywei avatar rickywei commented on May 19, 2024

any progress now?

from chromedp.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.