udger / udger-nodejs Goto Github PK

View Code? Open in Web Editor NEW

12.0 5.0 7.0 382 KB

Node.js agent string parser based on Udger https://udger.com/products/local_parser

License: MIT License

JavaScript 100.00%

bot-detection device-detector mobile-detection user-agent-parser

udger-nodejs's People

Contributors

Stargazers

Watchers

Forkers

jesseskinner henningborchers juneapp avrahamo adriaandotcom buildremote seanpm2001

udger-nodejs's Issues

bump version 1.2.1 and publish npm

publish module

bump version 0.9.0 (just in case of bugs between now and 1.0.0 release)
then npm publish

After publish we can
npm install udger-nodejs

We have to wait 24 hours before seeing the module in npmjs.org

crawler should return unrecognized device class

actualy, it return empty device class, bug spoted

implement in-memory caching

keep ip/ua/sqlite result in memory, with a default limit to 1000.

JSON Format, as an option, should dump full data, or compact data

example: transform the original format

{
    "user_agent": {
        "ua_string": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
        "ua_class": "Browser",
        "ua_class_code": "browser",
        "ua": "Chrome 62.0.3202.94",
        "ua_version": "62.0.3202.94",
        "ua_version_major": "62",
        "ua_uptodate_current_version": "62",
        "ua_family": "Chrome",
        "ua_family_code": "chrome",
        "ua_family_homepage": "http://www.google.com/chrome/",
        "ua_family_vendor": "Google Inc.",
        "ua_family_vendor_code": "google_inc",
        "ua_family_vendor_homepage": "https://www.google.com/about/company/",
        "ua_family_icon": "chrome.png",
        "ua_family_icon_big": "chrome_big.png",
        "ua_family_info_url": "https://udger.com/resources/ua-list/browser-detail?browser=Chrome",
        "ua_engine": "WebKit/Blink",
        "os": "Windows 10",
        "os_code": "windows_10",
        "os_homepage": "https://en.wikipedia.org/wiki/Windows_10",
        "os_icon": "windows10.png",
        "os_icon_big": "windows10_big.png",
        "os_info_url": "https://udger.com/resources/ua-list/os-detail?os=Windows 10",
        "os_family": "Windows",
        "os_family_code": "windows",
        "os_family_vendor": "Microsoft Corporation.",
        "os_family_vendor_code": "microsoft_corporation",
        "os_family_vendor_homepage": "https://www.microsoft.com/about/",
        "device_class": "Desktop",
        "device_class_code": "desktop",
        "device_class_icon": "desktop.png",
        "device_class_icon_big": "desktop_big.png",
        "device_class_info_url": "https://udger.com/resources/ua-list/device-detail?device=Desktop",
        "device_marketname": "",
        "device_brand": "",
        "device_brand_code": "",
        "device_brand_homepage": "",
        "device_brand_icon": "",
        "device_brand_icon_big": "",
        "device_brand_info_url": "",
        "crawler_last_seen": "",
        "crawler_category": "",
        "crawler_category_code": "",
        "crawler_respect_robotstxt": ""
    },
    "ip_address": {
        "ip": "2a02:598:7000:116:0:0:0:101",
        "ip_ver": 6,
        "ip_classification": "Crawler",
        "ip_classification_code": "crawler",
        "ip_hostname": "",
        "ip_last_seen": "2016-02-12 04:28:56",
        "ip_country": "Czech Republic",
        "ip_country_code": "CZ",
        "ip_city": "Prague",
        "crawler_name": "SeznamBot/3.2-test1",
        "crawler_ver": "3.2-test1",
        "crawler_ver_major": "3",
        "crawler_family": "SeznamBot",
        "crawler_family_code": "seznambot",
        "crawler_family_homepage": "http://napoveda.seznam.cz/en/seznambot-intro/",
        "crawler_family_vendor": "Seznam.cz, a.s.",
        "crawler_family_vendor_code": "seznam-cz_as",
        "crawler_family_vendor_homepage": "http://onas.seznam.cz/",
        "crawler_family_icon": "seznam.png",
        "crawler_family_info_url": "https://udger.com/resources/ua-list/bot-detail?bot=SeznamBot#id12590",
        "crawler_last_seen": "2016-08-31 15:19:38",
        "crawler_category": "Search engine bot",
        "crawler_category_code": "search_engine_bot",
        "crawler_respect_robotstxt": "yes",
        "datacenter_name": "Seznam.cz",
        "datacenter_name_code": "seznam_cz",
        "datacenter_homepage": "http://onas.seznam.cz/"
    },
    "from_cache": false
}

into

{
	"userAgent": {
		"ua": {
			"name": "Chrome 62.0.3202.94",
			"string": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
			"engine": "WebKit/Blink",
			"class": {
				"name": "Browser",
				"code": "browser"
			},
			"version": {
				"current": "62.0.3202.94",
				"major": "62",
				"uptodate_current_version": "62"
			},
			"family": {
				"name": "Chrome",
				"code": "chrome",
				"homepage": "http://www.google.com/chrome/",
				"vendor": {
					"name": "Google Inc.",
					"code": "google_inc",
					"homepage": "https://www.google.com/about/company/",
					"icon": "chrome.png",
					"icon_big": "chrome_big.png",
					"info_url": "https://udger.com/resources/ua-list/browser-detail?browser=Chrome"
				}
			}
		},
		"os": {
			"name": "Windows 10",
			"code": "windows_10",
			"homepage": "https://en.wikipedia.org/wiki/Windows_10",
			"icon": "windows10.png",
			"icon_big": "windows10_big.png",
			"info_url": "https://udger.com/resources/ua-list/os-detail?os=Windows 10",
			"family": {
				"name": "Windows",
				"code": "windows",
				"vendor": {
					"name": "Microsoft Corporation.",
					"code": "microsoft_corporation",
					"homepage": "https://www.microsoft.com/about/"
				}
			}
		},
		"device": {
			"marketname": "",
			"class": {
				"name": "Desktop",
				"code": "desktop",
				"icon": "desktop.png",
				"icon_big": "desktop_big.png",
				"info_url": "https://udger.com/resources/ua-list/device-detail?device=Desktop"
			},
			"brand": {
				"name": "",
				"code": "",
				"homepage": "",
				"icon": "",
				"icon_big": "",
				"info_url": ""
			}
		},
		"crawler": {
			"last_seen": "",
			"respect_robotstxt": "",
			"category": {
				"name": "",
				"code": ""
			}
		}
	},
	"ipAddress": {
		"ip": "2a02:598:7000:116:0:0:0:101",
		"ver": 6,
		"hostname": "",
		"last_seen": "2016-02-12 04:28:56",
		"classification": {
			"name": "Crawler",
			"code": "crawler"
		},
		"geoip": {
			"country": "Czech Republic",
			"code": "CZ",
			"city": "Prague"
		},
		"crawler": {
			"name": "SeznamBot/3.2-test1",
			"ver": "3.2-test1",
			"ver_major": "3",
			"last_seen": "2016-08-31 15:19:38",
			"respect_robotstxt": "yes",
			"family": {
				"name": "SeznamBot",
				"code": "seznambot",
				"homepage": "http://napoveda.seznam.cz/en/seznambot-intro/",
				"icon": "seznam.png",
				"info_url": "https://udger.com/resources/ua-list/bot-detail?bot=SeznamBot#id12590",
				"vendor": {
					"name": "Seznam.cz, a.s.",
					"code": "seznam-cz_as",
					"homepage": "http://onas.seznam.cz/"
				}
			},
			"category": {
				"name": "Search engine bot",
				"code": "search_engine_bot"
			}
		},
		"datacenter": {
			"name": "Seznam.cz",
			"code": "seznam_cz",
			"homepage": "http://onas.seznam.cz/"
		}
	},
	"from_cache": false
}

so we can access simply access to result.ipAddress.geoip.country

ipv4 ip2long result is buggy

some IPv4 Address ip2long result is bad, so the ip is not found in the database.

implement getUACrawlersFamilies helper

getUACrawlersFamilies() should callback this :

[
    {
        family: 'Googlebot',
        family_code: 'googlebot',
        crawler_classification: 'Search engine bot',
        crawler_classification_code: 'search_engine_bot'
    },
  ....
]

update better-sqlite3 package version

Can you please update the better-sqlite3 dependency to version v6.0.1
This version of better-sqlite3 (v6.0.1) uses prebuilt binaries and resolves many problems with npm install

refactors

move

phpRegexpToJs
getIpVersion
inet_pton => inetPton
inet_ntop => inetNtop

into utils.js and refactor index.js to use utils.js

Start following SemVer properly

Add CHANGELOG.md
Make branch after v1.0.0 released

Parser is very slow

It can take the parser 300-600 to analyzer the useragent if it's not cached yet.
Is there a way to cache the db requests that can be cached, or somehow make it faster?

bad path for defaultResult.json when using npm module

defaultResult.json is loaded with path "./"

Should be __dirname+'/defaultResult.json'

finish tests using test db

find every usefull functional tests and implement them.

Update Packages

Can you please update node-gyp to latest version there is Vulnerable

https://www.npmjs.com/advisories/803

implement helper getDatabaseInfo()

should return udger_db_info single record

when sqlite3 value result is null, convert it to empty string ('')

stay with a null value make tests failed

refactor helpers

getIpsClassification => getIPsClassification
getClientsClassification => getUAClientsClassification
getCrawlersClassification => getUACrawlersClassification
randomIpv4 => randomIPv4
randomClients => randomUAClients
randomClientsRegex => randomUAClientsRegex
randomCrawlers => randomUACrawlers

TypeError: Cannot read property 'toLowerCase' of undefined

Sometimes return this error

helpers are using callback, README.md is wrong

fix README
bump 1.3.1
npm publish

take default ret from defaultResult.json

avoid duplicate

typo in README

getClientsClassification should be getUAClientsClassification
getCrawlersClassification should be getUACrawlersClassification

update packages

Can you please update Lodash to latest version there is Vulnerable module: lodash.merge in the current version

datacenter & ipv6

This part is buggy at the moment, must investigate and fix.

implement helpers

getClientsClassification
getCrawlersClassification
getIpsClassification
randomClient
randomClientsRegex
randomCrawlers
randomIpv4

LRU Cache - Misleading documentation

According to the documentation, in order to start using the LRU cache, the following line should be added:
udgerParser.setCacheEnable()
While debugging it, and saw it doesn't affect the response nor the response time, I discovered that the following line should be added instead:
udgerParser.setCacheEnable(true)

digging into the source-code support my findings:
/** * Activate cache * @param {Boolean} cache - true or false */ setCacheEnable(cache) { this.cacheEnable = cache; }

Update better-sqlite3 to version 8.0.1

Cannot install udger-nodejs in nodejs 16+ due to integer library that is failing to install. This library is a dependency of better-sqlite3 < 8. When updating to better-sqlite3 they removed the dependency on integer so this solves the issue.

change licence

go to MIT

Typescript support

Since the UdgerParser class is not exposed, but only a default function that instantiate the class, there's no way to reference it in a typescript project and take advantage of static analysis.
For example:

class MyClass {
  constructor(private _uaParser: UdgerParser) {}
  
  getUserAgentInfo(req: Request) {
    const ua = req.headers['user-agent']
    return this._uaParser.parseUa(ua);
  }
}

Can you please expose the class (and maybe add correct types as well?)

bump 1.3.5

family are not unique, getUACrawlersFamilies should only use *_code

Change SQL Request

SELECT DISTINCT 
family, family_code
udger_crawler_class.crawler_classification
udger_crawler_class.crawler_classification_code
FROM udger_crawler_list
LEFT JOIN udger_crawler_class ON udger_crawler_class.id=udger_crawler_list.class_id

with

SELECT DISTINCT 
udger_crawler_list.family_code,
udger_crawler_class.crawler_classification_code 
FROM udger_crawler_list 
LEFT JOIN udger_crawler_class ON udger_crawler_class.id=udger_crawler_list.class_id
WHERE family_code != ""
ORDER BY family_code, crawler_classification_code

ip classification: 66.249.64.73 is not recognized as expected

Actual: unrecognized
Expected: Crawler

replace setIP(ip) and setUA(ua) with set({ip:'foo', 'ua':'bar'})

To avoid ip/ua reinit

helpers: randomUA

we probably can look for malicious UAs in the database.

update README.md

update readme so it's like all other udger bindings
update readme with a connect/express example

Missing features for udger database update

need udger.disconnect() method to avoid read/write conflict when copying new udger sqlite db
need udger.connect() to reconnect
need udger.cacheClean()

helpers: isComingFromDatacenter, isFakeCrawler

Should be great to have helpers like this:

udger.IP.isComingFromDatacenter(); // return true or false
udger.IP.isFakeCrawler(); // return true or false

bump 1.3.3 and publish

update packages

 better-sqlite3   4.0.3  →   4.1.0
 fs-extra         4.0.2  →   6.0.0
 ip-address       5.8.8  →   5.8.9
 connect          3.6.5  →   3.6.6
 merge-deep       3.0.0  →   3.0.1
 tap             11.0.0  →  11.1.4

set/parse race conditions

It would be better if the IP and UA could be passed directly to the parse() method rather than having to use set() before.

let ret = udgerParser.parse({
    ua:'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36',
    ip:'2A02:598:7000:116:0:0:0:101'
});

The parser should be stateless to avoid problems in race conditions. For example it could happen that two requests to our service which accesses the global Udger instance from inside an async function happen at the same time, leading to a set() -> set() -> parse() -> parse() sequence where the second request overwrites the IP and UA of the first request before the first request gets to the parse() method call