Git Product home page Git Product logo

udger-nodejs's People

Contributors

adriaandotcom avatar eviltik avatar mallat avatar

Stargazers

 avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar

Watchers

 avatar  avatar  avatar  avatar  avatar

udger-nodejs's Issues

publish module

  • bump version 0.9.0 (just in case of bugs between now and 1.0.0 release)
  • then npm publish

After publish we can
npm install udger-nodejs

We have to wait 24 hours before seeing the module in npmjs.org

JSON Format, as an option, should dump full data, or compact data

example: transform the original format

{
    "user_agent": {
        "ua_string": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
        "ua_class": "Browser",
        "ua_class_code": "browser",
        "ua": "Chrome 62.0.3202.94",
        "ua_version": "62.0.3202.94",
        "ua_version_major": "62",
        "ua_uptodate_current_version": "62",
        "ua_family": "Chrome",
        "ua_family_code": "chrome",
        "ua_family_homepage": "http://www.google.com/chrome/",
        "ua_family_vendor": "Google Inc.",
        "ua_family_vendor_code": "google_inc",
        "ua_family_vendor_homepage": "https://www.google.com/about/company/",
        "ua_family_icon": "chrome.png",
        "ua_family_icon_big": "chrome_big.png",
        "ua_family_info_url": "https://udger.com/resources/ua-list/browser-detail?browser=Chrome",
        "ua_engine": "WebKit/Blink",
        "os": "Windows 10",
        "os_code": "windows_10",
        "os_homepage": "https://en.wikipedia.org/wiki/Windows_10",
        "os_icon": "windows10.png",
        "os_icon_big": "windows10_big.png",
        "os_info_url": "https://udger.com/resources/ua-list/os-detail?os=Windows 10",
        "os_family": "Windows",
        "os_family_code": "windows",
        "os_family_vendor": "Microsoft Corporation.",
        "os_family_vendor_code": "microsoft_corporation",
        "os_family_vendor_homepage": "https://www.microsoft.com/about/",
        "device_class": "Desktop",
        "device_class_code": "desktop",
        "device_class_icon": "desktop.png",
        "device_class_icon_big": "desktop_big.png",
        "device_class_info_url": "https://udger.com/resources/ua-list/device-detail?device=Desktop",
        "device_marketname": "",
        "device_brand": "",
        "device_brand_code": "",
        "device_brand_homepage": "",
        "device_brand_icon": "",
        "device_brand_icon_big": "",
        "device_brand_info_url": "",
        "crawler_last_seen": "",
        "crawler_category": "",
        "crawler_category_code": "",
        "crawler_respect_robotstxt": ""
    },
    "ip_address": {
        "ip": "2a02:598:7000:116:0:0:0:101",
        "ip_ver": 6,
        "ip_classification": "Crawler",
        "ip_classification_code": "crawler",
        "ip_hostname": "",
        "ip_last_seen": "2016-02-12 04:28:56",
        "ip_country": "Czech Republic",
        "ip_country_code": "CZ",
        "ip_city": "Prague",
        "crawler_name": "SeznamBot/3.2-test1",
        "crawler_ver": "3.2-test1",
        "crawler_ver_major": "3",
        "crawler_family": "SeznamBot",
        "crawler_family_code": "seznambot",
        "crawler_family_homepage": "http://napoveda.seznam.cz/en/seznambot-intro/",
        "crawler_family_vendor": "Seznam.cz, a.s.",
        "crawler_family_vendor_code": "seznam-cz_as",
        "crawler_family_vendor_homepage": "http://onas.seznam.cz/",
        "crawler_family_icon": "seznam.png",
        "crawler_family_info_url": "https://udger.com/resources/ua-list/bot-detail?bot=SeznamBot#id12590",
        "crawler_last_seen": "2016-08-31 15:19:38",
        "crawler_category": "Search engine bot",
        "crawler_category_code": "search_engine_bot",
        "crawler_respect_robotstxt": "yes",
        "datacenter_name": "Seznam.cz",
        "datacenter_name_code": "seznam_cz",
        "datacenter_homepage": "http://onas.seznam.cz/"
    },
    "from_cache": false
}

into

{
	"userAgent": {
		"ua": {
			"name": "Chrome 62.0.3202.94",
			"string": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
			"engine": "WebKit/Blink",
			"class": {
				"name": "Browser",
				"code": "browser"
			},
			"version": {
				"current": "62.0.3202.94",
				"major": "62",
				"uptodate_current_version": "62"
			},
			"family": {
				"name": "Chrome",
				"code": "chrome",
				"homepage": "http://www.google.com/chrome/",
				"vendor": {
					"name": "Google Inc.",
					"code": "google_inc",
					"homepage": "https://www.google.com/about/company/",
					"icon": "chrome.png",
					"icon_big": "chrome_big.png",
					"info_url": "https://udger.com/resources/ua-list/browser-detail?browser=Chrome"
				}
			}
		},
		"os": {
			"name": "Windows 10",
			"code": "windows_10",
			"homepage": "https://en.wikipedia.org/wiki/Windows_10",
			"icon": "windows10.png",
			"icon_big": "windows10_big.png",
			"info_url": "https://udger.com/resources/ua-list/os-detail?os=Windows 10",
			"family": {
				"name": "Windows",
				"code": "windows",
				"vendor": {
					"name": "Microsoft Corporation.",
					"code": "microsoft_corporation",
					"homepage": "https://www.microsoft.com/about/"
				}
			}
		},
		"device": {
			"marketname": "",
			"class": {
				"name": "Desktop",
				"code": "desktop",
				"icon": "desktop.png",
				"icon_big": "desktop_big.png",
				"info_url": "https://udger.com/resources/ua-list/device-detail?device=Desktop"
			},
			"brand": {
				"name": "",
				"code": "",
				"homepage": "",
				"icon": "",
				"icon_big": "",
				"info_url": ""
			}
		},
		"crawler": {
			"last_seen": "",
			"respect_robotstxt": "",
			"category": {
				"name": "",
				"code": ""
			}
		}
	},
	"ipAddress": {
		"ip": "2a02:598:7000:116:0:0:0:101",
		"ver": 6,
		"hostname": "",
		"last_seen": "2016-02-12 04:28:56",
		"classification": {
			"name": "Crawler",
			"code": "crawler"
		},
		"geoip": {
			"country": "Czech Republic",
			"code": "CZ",
			"city": "Prague"
		},
		"crawler": {
			"name": "SeznamBot/3.2-test1",
			"ver": "3.2-test1",
			"ver_major": "3",
			"last_seen": "2016-08-31 15:19:38",
			"respect_robotstxt": "yes",
			"family": {
				"name": "SeznamBot",
				"code": "seznambot",
				"homepage": "http://napoveda.seznam.cz/en/seznambot-intro/",
				"icon": "seznam.png",
				"info_url": "https://udger.com/resources/ua-list/bot-detail?bot=SeznamBot#id12590",
				"vendor": {
					"name": "Seznam.cz, a.s.",
					"code": "seznam-cz_as",
					"homepage": "http://onas.seznam.cz/"
				}
			},
			"category": {
				"name": "Search engine bot",
				"code": "search_engine_bot"
			}
		},
		"datacenter": {
			"name": "Seznam.cz",
			"code": "seznam_cz",
			"homepage": "http://onas.seznam.cz/"
		}
	},
	"from_cache": false
}

so we can access simply access to result.ipAddress.geoip.country

implement getUACrawlersFamilies helper

getUACrawlersFamilies() should callback this :

[
    {
        family: 'Googlebot',
        family_code: 'googlebot',
        crawler_classification: 'Search engine bot',
        crawler_classification_code: 'search_engine_bot'
    },
  ....
]

update better-sqlite3 package version

Can you please update the better-sqlite3 dependency to version v6.0.1
This version of better-sqlite3 (v6.0.1) uses prebuilt binaries and resolves many problems with npm install

refactors

move

  • phpRegexpToJs
  • getIpVersion
  • inet_pton => inetPton
  • inet_ntop => inetNtop

into utils.js and refactor index.js to use utils.js

Parser is very slow

It can take the parser 300-600 to analyzer the useragent if it's not cached yet.
Is there a way to cache the db requests that can be cached, or somehow make it faster?

refactor helpers

  • getIpsClassification => getIPsClassification
  • getClientsClassification => getUAClientsClassification
  • getCrawlersClassification => getUACrawlersClassification
  • randomIpv4 => randomIPv4
  • randomClients => randomUAClients
  • randomClientsRegex => randomUAClientsRegex
  • randomCrawlers => randomUACrawlers

typo in README

  • getClientsClassification should be getUAClientsClassification
  • getCrawlersClassification should be getUACrawlersClassification

update packages

Can you please update Lodash to latest version there is Vulnerable module: lodash.merge in the current version

implement helpers

  • getClientsClassification

  • getCrawlersClassification

  • getIpsClassification

  • randomClient

  • randomClientsRegex

  • randomCrawlers

  • randomIpv4

LRU Cache - Misleading documentation

According to the documentation, in order to start using the LRU cache, the following line should be added:
udgerParser.setCacheEnable()
While debugging it, and saw it doesn't affect the response nor the response time, I discovered that the following line should be added instead:
udgerParser.setCacheEnable(true)

digging into the source-code support my findings:
/** * Activate cache * @param {Boolean} cache - true or false */ setCacheEnable(cache) { this.cacheEnable = cache; }

Update better-sqlite3 to version 8.0.1

Cannot install udger-nodejs in nodejs 16+ due to integer library that is failing to install. This library is a dependency of better-sqlite3 < 8. When updating to better-sqlite3 they removed the dependency on integer so this solves the issue.

Typescript support

Since the UdgerParser class is not exposed, but only a default function that instantiate the class, there's no way to reference it in a typescript project and take advantage of static analysis.
For example:

class MyClass {
  constructor(private _uaParser: UdgerParser) {}
  
  getUserAgentInfo(req: Request) {
    const ua = req.headers['user-agent']
    return this._uaParser.parseUa(ua);
  }
}

Can you please expose the class (and maybe add correct types as well?)

family are not unique, getUACrawlersFamilies should only use *_code

Change SQL Request

SELECT DISTINCT 
family, family_code
udger_crawler_class.crawler_classification
udger_crawler_class.crawler_classification_code
FROM udger_crawler_list
LEFT JOIN udger_crawler_class ON udger_crawler_class.id=udger_crawler_list.class_id

with

SELECT DISTINCT 
udger_crawler_list.family_code,
udger_crawler_class.crawler_classification_code 
FROM udger_crawler_list 
LEFT JOIN udger_crawler_class ON udger_crawler_class.id=udger_crawler_list.class_id
WHERE family_code != ""
ORDER BY family_code, crawler_classification_code

update README.md

  • update readme so it's like all other udger bindings
  • update readme with a connect/express example

update packages

 better-sqlite3   4.0.3  →   4.1.0
 fs-extra         4.0.2  →   6.0.0
 ip-address       5.8.8  →   5.8.9
 connect          3.6.5  →   3.6.6
 merge-deep       3.0.0  →   3.0.1
 tap             11.0.0  →  11.1.4

set/parse race conditions

It would be better if the IP and UA could be passed directly to the parse() method rather than having to use set() before.

let ret = udgerParser.parse({
    ua:'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36',
    ip:'2A02:598:7000:116:0:0:0:101'
});

The parser should be stateless to avoid problems in race conditions. For example it could happen that two requests to our service which accesses the global Udger instance from inside an async function happen at the same time, leading to a set() -> set() -> parse() -> parse() sequence where the second request overwrites the IP and UA of the first request before the first request gets to the parse() method call

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.