Comments (6)
xubuntu task manager shows this process was using ~ 1.5 GB memory.
when multi core each process was using ~ 1 GB
from audfprint.
when I use different database
Read fprints for 641 files ( 58808989 hashes) from /afp/songs.db
when task manager shows process using ~ 2.8 GB ram
--> database density is too much?
from audfprint.
I just upgraded ram to 16 gb and still got same error. what should I do?
from audfprint.
from audfprint.
- db creation args:
adsa = ['audfprint.py', 'new', '--dbase', songs_db_file, '--density', '40', '--skip-existing', '--maxtime', '32768', '--ncores', '4', '--list', tmpFile]
nshifts is not set or 0 - db : 8627 files ( 85002334 hashes), file size: 243M
- querying file duration 1 hour. still got same error.
- full error log:
...........................................................................
/afp/bin/audfprint.py in main(argv=['audfprint.py', 'match', '--dbase', '/afp/mongol_0127.db', '--match-win', '2', '--min-count', '200', '--max-matches', '100', '--sortbytime', '--opfile', '/afp/tmp/songs_.lst', '--ncores', '4', '--find-time-range', '--list', '/afp/tmp/run_.lst'])
463 do_cmd_multiproc(cmd, analyzer, hash_tab, filename_iter,
464 matcher, args['--precompdir'],
465 precomp_type, report,
466 skip_existing=args['--skip-existing'],
467 strip_prefix=args['--wavdir'],
--> 468 ncores=ncores)
ncores = 4
469 else:
470 do_cmd(cmd, analyzer, hash_tab, filename_iter,
471 matcher, args['--precompdir'], precomp_type, report,
472 skip_existing=args['--skip-existing'],
...........................................................................
/afp/bin/audfprint.py in do_cmd_multiproc(cmd='match', analyzer=<audfprint_analyze.Analyzer object>, hash_tab=<hash_table.HashTable object>, filename_iter=<generator object filename_list_iterator>, matcher=<audfprint_match.Matcher object>, outdir='.', type='hashes', report=<function report>, skip_existing=False, strip_prefix='', ncores=4)
250 msgslist = joblib.Parallel(n_jobs=ncores)(
251 # Would use matcher.file_match_to_msgs(), but you
252 # can't use joblib on an instance method
253 joblib.delayed(matcher_file_match_to_msgs)(matcher, analyzer,
254 hash_tab, filename)
--> 255 for filename in filename_iter
filename_iter = <generator object filename_list_iterator>
256 )
257 for msgs in msgslist:
258 report(msgs)
259
...........................................................................
/usr/local/lib/python2.7/dist-packages/joblib/parallel.py in __call__(self=Parallel(n_jobs=4), iterable=<generator object <genexpr>>)
763 if pre_dispatch == "all" or n_jobs == 1:
764 # The iterable was consumed all at once by the above for loop.
765 # No need to wait for async callbacks to trigger to
766 # consumption.
767 self._iterating = False
--> 768 self.retrieve()
self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=4)>
769 # Make sure that we get a last message telling us we are done
770 elapsed_time = time.time() - self._start_time
771 self._print('Done %3i out of %3i | elapsed: %s finished',
772 (len(self._output), len(self._output),
---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
MemoryError Fri Jan 27 11:12:56 2017
PID: 25956 Python 2.7.12: /usr/bin/python
...........................................................................
/usr/local/lib/python2.7/dist-packages/joblib/parallel.py in __call__(self=<joblib.parallel.BatchedCalls object>)
126 def __init__(self, iterator_slice):
127 self.items = list(iterator_slice)
128 self._size = len(self.items)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
func = <function matcher_file_match_to_msgs>
args = (<audfprint_match.Matcher object>, <audfprint_analyze.Analyzer object>, <hash_table.HashTable object>, '/afp/precomp/input_file.afpt')
kwargs = {}
self.items = [(<function matcher_file_match_to_msgs>, (<audfprint_match.Matcher object>, <audfprint_analyze.Analyzer object>, <hash_table.HashTable object>, '/afp/precomp/input_file.afpt'), {})]
132
133 def __len__(self):
134 return self._size
135
...........................................................................
/afp/bin/audfprint.py in matcher_file_match_to_msgs(matcher=<audfprint_match.Matcher object>, analyzer=<audfprint_analyze.Analyzer object>, hash_tab=<hash_table.HashTable object>, filename='/afp/precomp/input_file.afpt')
227 pr[core].join()
228
229
230 def matcher_file_match_to_msgs(matcher, analyzer, hash_tab, filename):
231 """Cover for matcher.file_match_to_msgs so it can be passed to joblib"""
--> 232 return matcher.file_match_to_msgs(analyzer, hash_tab, filename)
matcher.file_match_to_msgs = <bound method Matcher.file_match_to_msgs of <audfprint_match.Matcher object>>
analyzer = <audfprint_analyze.Analyzer object>
hash_tab = <hash_table.HashTable object>
filename = '/afp/precomp/input_file.afpt'
233
234 def do_cmd_multiproc(cmd, analyzer, hash_tab, filename_iter, matcher,
235 outdir, type, report, skip_existing=False,
236 strip_prefix=None, ncores=1):
...........................................................................
/afp/bin/audfprint_match.py in file_match_to_msgs(self=<audfprint_match.Matcher object>, analyzer=<audfprint_analyze.Analyzer object>, ht=<hash_table.HashTable object>, qry='/afp/precomp/input_file.afpt', number=None)
330 return (rslts[:self.max_returns, :], durd, len(q_hashes))
331
332 def file_match_to_msgs(self, analyzer, ht, qry, number=None):
333 """ Perform a match on a single input file, return list
334 of message strings """
--> 335 rslts, dur, nhash = self.match_file(analyzer, ht, qry, number)
rslts = undefined
dur = undefined
nhash = undefined
self.match_file = <bound method Matcher.match_file of <audfprint_match.Matcher object>>
analyzer = <audfprint_analyze.Analyzer object>
ht = <hash_table.HashTable object>
qry = '/afp/precomp/input_file.afpt'
number = None
336 t_hop = analyzer.n_hop/float(analyzer.target_sr)
337 if self.verbose:
338 qrymsg = qry + (' %.1f '%dur) + "sec " + str(nhash) + " raw hashes"
339 else:
...........................................................................
/afp/bin/audfprint_match.py in match_file(self=<audfprint_match.Matcher object>, analyzer=<audfprint_analyze.Analyzer object>, ht=<hash_table.HashTable object>, filename='/afp/precomp/input_file.afpt', number=None)
321 numberstring = ""
322 print time.ctime(), "Analyzed", numberstring, filename, "of", \
323 ('%.3f'%durd), "s " \
324 "to", len(q_hashes), "hashes"
325 # Run query
--> 326 rslts = self.match_hashes(ht, q_hashes)
rslts = undefined
self.match_hashes = <bound method Matcher.match_hashes of <audfprint_match.Matcher object>>
ht = <hash_table.HashTable object>
q_hashes = [(2, 41037), (2, 41104), (2, 41740), (2, 245764), (2, 247569), (2, 247688), (6, 247565), (6, 247631), (6, 247684), (9, 532109), (9, 532234), (9, 532427), (10, 372496), (10, 372617), (10, 372683), (10, 696590), (10, 697361), (10, 698774), (10, 983322), (10, 986129), ...]
327 # Post filtering
328 if self.sort_by_time:
329 rslts = rslts[(-rslts[:, 2]).argsort(), :]
330 return (rslts[:self.max_returns, :], durd, len(q_hashes))
...........................................................................
/afp/bin/audfprint_match.py in match_hashes(self=<audfprint_match.Matcher object>, ht=<hash_table.HashTable object>, hashes=[(2, 41037), (2, 41104), (2, 41740), (2, 245764), (2, 247569), (2, 247688), (6, 247565), (6, 247631), (6, 247684), (9, 532109), (9, 532234), (9, 532427), (10, 372496), (10, 372617), (10, 372683), (10, 696590), (10, 697361), (10, 698774), (10, 983322), (10, 986129), ...], hashesfor=None)
276 bestids, rawcounts = self._best_count_ids(hits, ht)
277
278 #log("len(rawcounts)=%d max(bestcountsixs)=%d" %
279 # (len(rawcounts), max(bestcountsixs)))
280 if not self.exact_count:
--> 281 results = self._approx_match_counts(hits, bestids, rawcounts)
results = undefined
self._approx_match_counts = <bound method Matcher._approx_match_counts of <audfprint_match.Matcher object>>
hits = array([[ 335, 3861, 41037, 2],
... 8163, -147252, 966850, 151424]], dtype=int32)
bestids = array([15350, 15226, 15289, 15116, 14836, 15300,... 15112, 15299, 15322, 15196, 15037], dtype=int32)
rawcounts = array([ 9946, 5445, 10457, 11158, 12669, 11451,... 7676, 12615, 7222, 8288, 6512, 9495, 9505])
282 else:
283 results = self._exact_match_counts(hits, bestids, rawcounts,
284 hashesfor)
285 # Sort results by filtered count, descending
...........................................................................
/afp/bin/audfprint_match.py in _approx_match_counts(self=<audfprint_match.Matcher object>, hits=array([[ 335, 3861, 41037, 2],
... 8163, -147252, 966850, 151424]], dtype=int32), ids=array([15350, 15226, 15289, 15116, 14836, 15300,... 15112, 15299, 15322, 15196, 15037], dtype=int32), rawcounts=array([ 9946, 5445, 10457, 11158, 12669, 11451,... 7676, 12615, 7222, 8288, 6512, 9495, 9505]))
229 mintime = np.amin(alltimes)
230 alltimes -= mintime
231 nresults = 0
232 # Hash IDs and times together, so only a single bincount
233 timebits = max(1, encpowerof2(np.amax(alltimes)))
--> 234 allbincounts = np.bincount((allids << timebits) + alltimes)
allbincounts = undefined
allids = array([ 335, 409, 673, ..., 14474, 13210, 8163])
timebits = 18
alltimes = array([155263, 151879, 153009, ..., 1895, 1433, 4150])
235 min_time = 0
236 max_time = 0
237 for urank, (id, rawcount) in enumerate(zip(ids, rawcounts)):
238 # Make sure id is an int64 before shifting it up.
MemoryError:
______________________
from audfprint.
Note that the np.bincount logic was changed in e64d933 to drastically reduce the peak memory usage. This should be fixed.
from audfprint.
Related Issues (20)
- Incorrect time range HOT 1
- Incorrect Time range. HOT 1
- Convert .afpk files to mp3? HOT 1
- Problem with "spreadpeaksinvector"
- Reduce memory usage HOT 2
- Use audfprint as a module
- How to increase bits for storing IDs and timestamp?
- illustrate
- Scan every folder in every fingerprint base named as folder HOT 3
- UNICODE chaaracters ERROR HOT 11
- Show more than 1 finded matched names in results. HOT 4
- Can this algorithm load the historical features into memory first, so that the matching speed is improved, but I don't know how to modify your basic code HOT 7
- Hello, if the song is 1 million (the duration of the song is about 3 minutes), the matching time is very slow, how to optimize it? HOT 1
- I found that according to The time and hash generated by audfprint are not continuous in time, which led me to use the binary method to search for the same hash, conduct a statistical ranking of the number of hashes, and search for the original version of individual audio (the climax audio), and the ranking is not the first.
- I found that some match matches are inaccurate. I want to know, is the time and hash generated by this audfprint continuous at the start-end time, or is it a peak value, which leads to the fact that if one song is 1 minute, another song 3 minutes, there are a lot of hashes in the previous minute in the last two minutes of the next song, which leads to the fact that the hash statistics are larger than one minute, resulting in inaccurate sorting, then the match situation is inaccurate, how to optimize this? HOT 2
- output matching different between windows and linux. I created a database of filehashes which is around 340MB. When i try to query around 100 mp3 files my output is different between my windows machine and my raspberri pi. The database is identical, the query is identical but the windows machine finds significantly more matches. Both are running python 3.9. Database was created on the windows machine and transfered to the pi. Anyone encountered something similar? HOT 7
- Question about concatenate afpk files HOT 2
- How to avoid big % Dropped HOT 5
- Can someone take on audfprint-gui for audfprint or create a new gui for it? HOT 2
- Ability to split pklzs into smaller sizes
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from audfprint.