udger / udger-nodejs Goto Github PK
View Code? Open in Web Editor NEWNode.js agent string parser based on Udger https://udger.com/products/local_parser
License: MIT License
Node.js agent string parser based on Udger https://udger.com/products/local_parser
License: MIT License
npms
After publish we can
npm install udger-nodejs
We have to wait 24 hours before seeing the module in npmjs.org
actualy, it return empty device class, bug spoted
keep ip/ua/sqlite result in memory, with a default limit to 1000.
example: transform the original format
{
"user_agent": {
"ua_string": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
"ua_class": "Browser",
"ua_class_code": "browser",
"ua": "Chrome 62.0.3202.94",
"ua_version": "62.0.3202.94",
"ua_version_major": "62",
"ua_uptodate_current_version": "62",
"ua_family": "Chrome",
"ua_family_code": "chrome",
"ua_family_homepage": "http://www.google.com/chrome/",
"ua_family_vendor": "Google Inc.",
"ua_family_vendor_code": "google_inc",
"ua_family_vendor_homepage": "https://www.google.com/about/company/",
"ua_family_icon": "chrome.png",
"ua_family_icon_big": "chrome_big.png",
"ua_family_info_url": "https://udger.com/resources/ua-list/browser-detail?browser=Chrome",
"ua_engine": "WebKit/Blink",
"os": "Windows 10",
"os_code": "windows_10",
"os_homepage": "https://en.wikipedia.org/wiki/Windows_10",
"os_icon": "windows10.png",
"os_icon_big": "windows10_big.png",
"os_info_url": "https://udger.com/resources/ua-list/os-detail?os=Windows 10",
"os_family": "Windows",
"os_family_code": "windows",
"os_family_vendor": "Microsoft Corporation.",
"os_family_vendor_code": "microsoft_corporation",
"os_family_vendor_homepage": "https://www.microsoft.com/about/",
"device_class": "Desktop",
"device_class_code": "desktop",
"device_class_icon": "desktop.png",
"device_class_icon_big": "desktop_big.png",
"device_class_info_url": "https://udger.com/resources/ua-list/device-detail?device=Desktop",
"device_marketname": "",
"device_brand": "",
"device_brand_code": "",
"device_brand_homepage": "",
"device_brand_icon": "",
"device_brand_icon_big": "",
"device_brand_info_url": "",
"crawler_last_seen": "",
"crawler_category": "",
"crawler_category_code": "",
"crawler_respect_robotstxt": ""
},
"ip_address": {
"ip": "2a02:598:7000:116:0:0:0:101",
"ip_ver": 6,
"ip_classification": "Crawler",
"ip_classification_code": "crawler",
"ip_hostname": "",
"ip_last_seen": "2016-02-12 04:28:56",
"ip_country": "Czech Republic",
"ip_country_code": "CZ",
"ip_city": "Prague",
"crawler_name": "SeznamBot/3.2-test1",
"crawler_ver": "3.2-test1",
"crawler_ver_major": "3",
"crawler_family": "SeznamBot",
"crawler_family_code": "seznambot",
"crawler_family_homepage": "http://napoveda.seznam.cz/en/seznambot-intro/",
"crawler_family_vendor": "Seznam.cz, a.s.",
"crawler_family_vendor_code": "seznam-cz_as",
"crawler_family_vendor_homepage": "http://onas.seznam.cz/",
"crawler_family_icon": "seznam.png",
"crawler_family_info_url": "https://udger.com/resources/ua-list/bot-detail?bot=SeznamBot#id12590",
"crawler_last_seen": "2016-08-31 15:19:38",
"crawler_category": "Search engine bot",
"crawler_category_code": "search_engine_bot",
"crawler_respect_robotstxt": "yes",
"datacenter_name": "Seznam.cz",
"datacenter_name_code": "seznam_cz",
"datacenter_homepage": "http://onas.seznam.cz/"
},
"from_cache": false
}
into
{
"userAgent": {
"ua": {
"name": "Chrome 62.0.3202.94",
"string": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
"engine": "WebKit/Blink",
"class": {
"name": "Browser",
"code": "browser"
},
"version": {
"current": "62.0.3202.94",
"major": "62",
"uptodate_current_version": "62"
},
"family": {
"name": "Chrome",
"code": "chrome",
"homepage": "http://www.google.com/chrome/",
"vendor": {
"name": "Google Inc.",
"code": "google_inc",
"homepage": "https://www.google.com/about/company/",
"icon": "chrome.png",
"icon_big": "chrome_big.png",
"info_url": "https://udger.com/resources/ua-list/browser-detail?browser=Chrome"
}
}
},
"os": {
"name": "Windows 10",
"code": "windows_10",
"homepage": "https://en.wikipedia.org/wiki/Windows_10",
"icon": "windows10.png",
"icon_big": "windows10_big.png",
"info_url": "https://udger.com/resources/ua-list/os-detail?os=Windows 10",
"family": {
"name": "Windows",
"code": "windows",
"vendor": {
"name": "Microsoft Corporation.",
"code": "microsoft_corporation",
"homepage": "https://www.microsoft.com/about/"
}
}
},
"device": {
"marketname": "",
"class": {
"name": "Desktop",
"code": "desktop",
"icon": "desktop.png",
"icon_big": "desktop_big.png",
"info_url": "https://udger.com/resources/ua-list/device-detail?device=Desktop"
},
"brand": {
"name": "",
"code": "",
"homepage": "",
"icon": "",
"icon_big": "",
"info_url": ""
}
},
"crawler": {
"last_seen": "",
"respect_robotstxt": "",
"category": {
"name": "",
"code": ""
}
}
},
"ipAddress": {
"ip": "2a02:598:7000:116:0:0:0:101",
"ver": 6,
"hostname": "",
"last_seen": "2016-02-12 04:28:56",
"classification": {
"name": "Crawler",
"code": "crawler"
},
"geoip": {
"country": "Czech Republic",
"code": "CZ",
"city": "Prague"
},
"crawler": {
"name": "SeznamBot/3.2-test1",
"ver": "3.2-test1",
"ver_major": "3",
"last_seen": "2016-08-31 15:19:38",
"respect_robotstxt": "yes",
"family": {
"name": "SeznamBot",
"code": "seznambot",
"homepage": "http://napoveda.seznam.cz/en/seznambot-intro/",
"icon": "seznam.png",
"info_url": "https://udger.com/resources/ua-list/bot-detail?bot=SeznamBot#id12590",
"vendor": {
"name": "Seznam.cz, a.s.",
"code": "seznam-cz_as",
"homepage": "http://onas.seznam.cz/"
}
},
"category": {
"name": "Search engine bot",
"code": "search_engine_bot"
}
},
"datacenter": {
"name": "Seznam.cz",
"code": "seznam_cz",
"homepage": "http://onas.seznam.cz/"
}
},
"from_cache": false
}
so we can access simply access to result.ipAddress.geoip.country
some IPv4 Address ip2long result is bad, so the ip is not found in the database.
getUACrawlersFamilies() should callback this :
[
{
family: 'Googlebot',
family_code: 'googlebot',
crawler_classification: 'Search engine bot',
crawler_classification_code: 'search_engine_bot'
},
....
]
Can you please update the better-sqlite3 dependency to version v6.0.1
This version of better-sqlite3 (v6.0.1) uses prebuilt binaries and resolves many problems with npm install
move
into utils.js and refactor index.js to use utils.js
It can take the parser 300-600 to analyzer the useragent if it's not cached yet.
Is there a way to cache the db requests that can be cached, or somehow make it faster?
defaultResult.json is loaded with path "./"
Should be __dirname+'/defaultResult.json'
find every usefull functional tests and implement them.
Can you please update node-gyp to latest version there is Vulnerable
npm publish
should return udger_db_info single record
stay with a null value make tests failed
Sometimes return this error
avoid duplicate
Can you please update Lodash to latest version there is Vulnerable module: lodash.merge in the current version
This part is buggy at the moment, must investigate and fix.
getClientsClassification
getCrawlersClassification
getIpsClassification
randomClient
randomClientsRegex
randomCrawlers
randomIpv4
According to the documentation, in order to start using the LRU cache, the following line should be added:
udgerParser.setCacheEnable()
While debugging it, and saw it doesn't affect the response nor the response time, I discovered that the following line should be added instead:
udgerParser.setCacheEnable(true)
digging into the source-code support my findings:
/** * Activate cache * @param {Boolean} cache - true or false */ setCacheEnable(cache) { this.cacheEnable = cache; }
Cannot install udger-nodejs in nodejs 16+ due to integer library that is failing to install. This library is a dependency of better-sqlite3 < 8. When updating to better-sqlite3 they removed the dependency on integer so this solves the issue.
go to MIT
Since the UdgerParser class is not exposed, but only a default function that instantiate the class, there's no way to reference it in a typescript project and take advantage of static analysis.
For example:
class MyClass {
constructor(private _uaParser: UdgerParser) {}
getUserAgentInfo(req: Request) {
const ua = req.headers['user-agent']
return this._uaParser.parseUa(ua);
}
}
Can you please expose the class (and maybe add correct types as well?)
Change SQL Request
SELECT DISTINCT
family, family_code
udger_crawler_class.crawler_classification
udger_crawler_class.crawler_classification_code
FROM udger_crawler_list
LEFT JOIN udger_crawler_class ON udger_crawler_class.id=udger_crawler_list.class_id
with
SELECT DISTINCT
udger_crawler_list.family_code,
udger_crawler_class.crawler_classification_code
FROM udger_crawler_list
LEFT JOIN udger_crawler_class ON udger_crawler_class.id=udger_crawler_list.class_id
WHERE family_code != ""
ORDER BY family_code, crawler_classification_code
Actual: unrecognized
Expected: Crawler
To avoid ip/ua reinit
we probably can look for malicious UAs in the database.
Should be great to have helpers like this:
udger.IP.isComingFromDatacenter(); // return true or false
udger.IP.isFakeCrawler(); // return true or false
better-sqlite3 4.0.3 → 4.1.0
fs-extra 4.0.2 → 6.0.0
ip-address 5.8.8 → 5.8.9
connect 3.6.5 → 3.6.6
merge-deep 3.0.0 → 3.0.1
tap 11.0.0 → 11.1.4
It would be better if the IP and UA could be passed directly to the parse()
method rather than having to use set()
before.
let ret = udgerParser.parse({
ua:'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36',
ip:'2A02:598:7000:116:0:0:0:101'
});
The parser should be stateless to avoid problems in race conditions. For example it could happen that two requests to our service which accesses the global Udger instance from inside an async
function happen at the same time, leading to a set()
-> set()
-> parse()
-> parse()
sequence where the second request overwrites the IP and UA of the first request before the first request gets to the parse()
method call
A declarative, efficient, and flexible JavaScript library for building user interfaces.
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google ❤️ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.