stevenhalim / cpbook-code Goto Github PK

CP4 Free Source Code Project (C++17, Java11, Python3 and OCaml)

C++ 29.68% Java 33.77% Python 18.16% HTML 0.79% OCaml 17.60%

cpbook-code's Introduction

cpbook-code

CP4 Free Source Code Project (cpp/gnu++17, java/java11, py/python3, and ml/ocaml).

This repo is expected to be much more up-to-date than CP4 itself, that will go out to public on 19 July 2020.

All code in this repo are mostly (reasonably good, hopefully :) implementation of many well-known data structures and algorithms, so you are free to copy, adapt, use the code (in your next programming contest or any other coding project).

Note to Computer Science educators worldwide: please include all the code discussed here in your plagiarism checker system as "excluded". Please do NOT test your students to just (re-)code these basic data structures/algorithms verbatim as that is just reinventing the wheel. Please test them with higher order thinking questions/problems that use these code instead.

We will be thankful if you cite us (Steven Halim, Felix Halim, Suhendry Effendy) when you use code in this repo.

This license is probably the most suitable: https://opensource.org/licenses/UPL

cpbook-code's People

Contributors

Stargazers

Watchers

Forkers

pullasunil ccebinger jackhamper nguyenmvo pdnm fanghuix raivatshah sindhu819 rrp212 tjuxjh suresh1784 donjar steven-mathew stormwild nianfei97 nicoleyu0623 rrtheonlyone chrismaxheart zj-cs2103 plty mntabassm heiseish foxtrot9 rishabh-malik xsot stevenwjy khawyewonn ankurtrapasiya alishakiba imjching arupjyoti mfhs littlewhitewhite mdturin kkapdolee zkchang97 zyleet isinkaye1 johrishruti rtan265 dheerajaraj zhp007 chandansitlani ll-pengfei xiazixi phoenixstar7 rohanmukhija j2jooooo dormperetz learnermaxrl techyajay davisv7 adsk2050 u-sync sriramvsharma tedlee1024 tpnguyen gsoni22 michaelshuoliu mpdo2017 boluvishal davindermaverick bushihuazai iamshankhadeep asarolia adammubarok zeroming sat1001 sushant199 farheen2302 rubix982 allgebrist gabriel301 calsaviour nahin100 jitendra-maurya0 leosonus abdelhak4 lancehaoh ks3rr zukarusan benchan911 kbattish gottaboy sipln liucy1898 gbassole billchee123 bergolho debodirno nambcn97 shashisingh arvindugale saiyashwanth pandiwaw ajaysharma388 timbwa yamgent zidquocviet1 lm5lm5

cpbook-code's Issues

Improvement to Dinic's algorithm code

Instead of performing both bfs (initialize dist) and dfs (find augmenting path) from source, we can bfs from sink but still dfs from source. Doing so avoids exploring useless nodes that do not take us closer to sink while finding augmenting paths and a crude benchmark shows 10-20% runtime reduction on random graphs. This also makes dist more consistent with the labeling function in Push-relabel algorithm.

Rabin-Karp implementation runs in O(n log n) due to using extended euclidean algorithm for modInverse

Considering the following section in /ch6/string_matching.cpp, we call modInverse for every check (we check O(n) possible sub-strings of length m). The comment for hash_fast states it is O(1) but since the extended euclidean algorithm is O(log m), then our total time for string matching is O(n log m).

cpbook-code/ch6/string_matching.cpp

Lines 71 to 97 in c3fb85a

 int extEuclid(int a, int b, int &x, int &y) { // pass x and y by ref 

 int xx = y = 0; 

 int yy = x = 1; 

 while (b) { // repeats until b == 0 

 int q = a/b; 

 tie(a, b) = tuple(b, a%b); 

 tie(x, xx) = tuple(xx, x-q*xx); 

 tie(y, yy) = tuple(yy, y-q*yy); 

 } 

 return a; // returns gcd(a, b) 

 } 

 int modInverse(int b, int m) { // returns b^(-1) (mod m) 

 int x, y; 

 int d = extEuclid(b, m, x, y); // to get b*x + m*y == d 

 if (d != 1) return -1; // to indicate failure 

 // b*x + m*y == 1, now apply (mod m) to get b*x == 1 (mod m) 

 return (x+m)%m; // this is the answer 

 } 

 int hash_fast(int L, int R) { // O(1) hash of any substr 

 if (L == 0) return h[R]; // h is the prefix hashes 

 int ans = 0; 

 ans = ((h[R] - h[L-1]) % M + M) % M; // compute differences 

 ans = ((ll)ans * modInverse(Pow[L], M)) % M; // remove P[L]^-1 (mod M) 

 return ans; 

 }

We can fix this by pre-computing the inverse of each P[i] using the following implementation in O(log m + n) since we only use modInverse once.

class RollingHash {
   public:
    vi P, H;   // P[i] = p^i mod m, H[i] is the hash of prefix length i
    vi P_inv;  // P_inv[i] = p^(-i) mod m
    const int n;
    string T;
    const ll p, M;

    RollingHash(string _s, int _p = 131, int _M = (int)1e9 + 7)
        : n(_s.size()), T(_s), p(_p), M(_M) {
        PrepareP();
        computeRollingHash();
    }
    void PrepareP() {  // precompute P and P_inv
        P.assign(n, 0);
        P[0] = 1;
        for (int i = 1; i < n; i++) P[i] = (P[i - 1] * p) % M;

        P_inv.assign(n, 0);
        P_inv[n - 1] = modInverse(P[n - 1], M);
        for (int i = n - 2; i >= 0; i--) P_inv[i] = (P_inv[i + 1] * p) % M;
    }

    void computeRollingHash() {  // precompute H
        H.assign(n, 0);
        for (int i = 0; i < n; i++) {
            if (i != 0) H[i] = H[i - 1];
            H[i] = (H[i] + ((ll)T[i] * P[i]) % M) % M;
        }
    }

    int getHash(int l, int r) {  // get hash of substring [l, r]
        if (l == 0) return H[r];
        int ans = ((H[r] - H[l - 1]) % M + M) % M;
        ans = ((ll)ans * P_inv[l]) % M;
        return ans;
    }
};

// Returns a vector of indices of all occurrences of pattern in text
vi rabinKarp(string P, string T) {
    RollingHash P_rolling(P);
    RollingHash T_rolling(T);
    vi matches;

    int n = T.size(), m = P.size();
    int p_hash = P_rolling.getHash(0, m - 1);
    for (int i = 0; i <= n - m; i++) {
        if (p_hash == T_rolling.getHash(i, i + m - 1)) {  // match
            matches.push_back(i);
        }
    }
    return matches;
};

Note that using the original code gets TLE for kattis stringmatching while the modified can AC.

duplicate Ex_1.3.4.1/task11.py and task12.py (delete one)

Bit manip danger

from discord:

(what if j is something like a&b. Admittedly unlikely)
better to write (j) there, or even better just use a function (compilers these days good at inlining)

Geometry - Wrong order in result after sorting

In this main code,

cpbook-code/ch7/points_lines.java

Lines 174 to 180 in f8f34ba

 point[] P = new point[6]; 

 P[0] = new point(2, 2); 

 P[1] = new point(4, 3); 

 P[2] = new point(2, 4); 

 P[3] = new point(6, 6); 

 P[4] = new point(2, 6); 

 P[5] = new point(6, 5);

i changed into

point[] P = new point[7];
P[0] = new point(3.6, 4.5);
P[1] = new point(0, 2);
P[2] = new point(1.75, 6.75);
P[3] = new point(2.4, 3);
P[4] = new point(5.6, 5.8);
P[5] = new point(0.5, 1.5);
P[6] = new point(4.75, 2.1);

And the result after Arrays.sort(P) is

(0.00, 2.00)
(0.50, 1.50)
(1.75, 6.75)
(2.40, 3.00)
(3.60, 4.50)
(5.60, 5.80)
(4.75, 2.10)

Shouldn't the (4.75, 2.10) appear before (5.60, 5.80)?

Wrong allocation size lead to runtime error

cpbook-code/ch9/SparseTable.cpp

Lines 17 to 22 in 26fb737

 P2.assign(L2_n, 0); 

 L2.assign(1<<L2_n, 0); 

 for (int i = 0; i <= L2_n; ++i) { 

 P2[i] = (1<<i); // to speed up 2^i 

 L2[(1<<i)] = i; // to speed up log_2(i) 

 }

In line 17 we assign P2 a size of L2_n, implying we have the range [0,L2_n) available. Then we iterate i through [0,L2_n] and assign P2[i]. This leads to a problem when i = L2_n. This is a really hard to catch bug, as C++ usually allocate more size than necessary, but this very mistake has got me a runtime error in problem https://www.codechef.com/problems/TALCA?tab=statement

Error found in CP4 Book 1

Hi,

I couldn't find where to submit errors found in the book. I recently bought CP4 Book 1 and in the example for Max 2D Range Sum I believe there is an error in the cumulative array sum on page 175, table 3.3, figure b.

Original Array

 0 -2 -7  0
 9  2 -6  2
-4  1 -4  1
-1  8  0 -2

Cumulative Sum Array (In Book)

 0 -2 -9 -9 
 9  9 -4  2
 ...

I believe A[1][3] should be -2 and not 2.

Thanks!
JT

Bug in ch5/primes.cpp

In ch5/primes.cpp -- primeFactors(ll N):

while (PF*PF <= N) {
  while (N%PF == 0) { N /= PF; factors.push_back(PF); }
  PF = primes[++PF_idx];
}

This code might crash due to index-out-of-bound in PF = primes[++PF_idx]. Consider the case when PF_idx = primes.size()-1 (the last prime in primes), then when it goes to the problematic line, PF_idx becomes primes.size() which does not exist in PF.

This bug also happened in other functions as well, i.e., numPF(), numDiffPF(), sumPF(), numDiv(), sumDiv(), EulerPhi().

Incorrect in-place FFT Code (in CP4 Book)

In CP4 Page 517, code for an in-place FFT is written. However, it contains (multiple) errors. Here is a working version of the same function.

typedef complex<long double> cd;
const double PI = acos(-1.0);

int reverseBit(int x, int m) {  // m is the binary length A.size()-1
    int ret = 0;
    for (int k = 0; k < m; k++) {
        if (x & (1 << k)) ret |= (1 << (m - 1 - k));
    }
    return ret;
}

void FFT(vector<cd> &A) {  // evaulates A at the nth roots of unity for n = 2^k >= A.size()
    int m = 0;
    while ((1 << m) < (int)A.size()) m++;
    A.resize(1 << m, 0);                       // size of A should be a power of 2, resizes A
    for (int k = 0; k < (int)A.size(); k++) {  // sort to bit-reversal permutation
        if (k < reverseBit(k, m)) swap(A[k], A[reverseBit(k, m)]);
    }

    for (int n = 2; n <= (int)A.size(); n <<= 1) {
        for (int k = 0; 2 * k < n; k++) {
            // we are going to get the kth and k+n/2th element of each length n block
            cd x = cd(cos(2 * PI * k / n), sin(2 * PI * k / n));  // nth root of unity
            for (int j = 0; j < (int)A.size(); j += n) {  // apply to every sub-array of length n
                cd A0k = A[k + j];                        // previously computed
                cd A1k = A[k + j + n / 2];                // previously computed
                A[k + j] = A0k + x * A1k;
                A[k + j + n / 2] = A0k - x * A1k;
            }
        }
    }
}

Bug in ch9/SparseTable.cpp

In ch9/SparseTable.cpp

P2.assign(L2_n, 0);
L2.assign(1<<L2_n, 0);
for (int i = 0; i <= L2_n; ++i) {
  P2[i] = (1<<i);
  L2[(1<<i)] = i;
}

P1 is a vector and is assigned with L2_n elements of 0 (from index 0 to L2_n-1), but in the iteration, it accesses P2[i = L2_n] which does not exist.

AVL tree can get imbalanced

Example of Problem

AVL* A = new AVL();

int n = 9;
int arr[] = {7, 4, 8, 2, 5, 9, 1, 3, 6};
for (int i = 0; i < n; i++) {
    A->insert(arr[i]);
}
A->remove(9);
BSTVertex* L = A->root->left; // make root public to check this
printf("%d\n", L->key);
printf("%d\n", L->height);
printf("%d\n", L->right == NULL);

After insertion of the 9 elements, the tree looks like this:

After deletion of the 9, the tree looks like this:

The tree is imbalanced at the 4 node, as it has height 2 but the right child is NULL.

What causes the issue

In the above example, the tree after deleting 9, and before rebalancing looks like this:

The root is now imbalanced and needs to be rebalanced.
This is the relevant rebalancing code:

int balance = h(T->left) - h(T->right);
if (balance == 2) { // left heavy
    int balance2 = h(T->left->left) - h(T->left->right);
    if (balance2 == 1) {
        T = rotateRight(T);
    }
    else { // -1
        T->left = rotateLeft(T->left);
        T = rotateRight(T);
     }

In the example, balance2 is 0, so we enter the else case, which causes left rotation of left child followed by right rotation.
The tree after the left rotation of the left child:

The tree after the right rotation of the root (final result):

Suggested fix

Change the if condition in the rebalancing to

if (balance2 >= 0) {

to trigger the right rotation for the 0 case.
This yields the following, correctly balanced tree:

The same needs to be done for the symmetric case.

Minor issues about Heliocentric.cpp

cpbook-code/ch9/heliocentric.cpp

Lines 18 to 20 in 666991e

 int t = b; b = a%b; a = t; 

 t = xx; xx = x-q*xx; x = t; 

 t = yy; yy = y-q*yy; y = t;

use a temporary variable t to achieve multiple assignments. For better readability, the code can be written as follows.

tie(a, b) = tuple(b, a%b);
tie(x, xx) = tuple(xx, x-q*xx);
tie(y, yy) = tuple(yy, y-q*yy);

The downside is that it's slightly longer and requres C++17 for https://en.cppreference.com/w/cpp/language/class_template_argument_deduction. For C++11 and C++14, make_tuple can be used instead.

Fenwick Tree Correction

In cpbook-code/ch2/ourown/fenwicktree_ds.py line 16, the second j should be an i, not a j.

Fenwick Tree's select method gives incorrect answer

Sometimes the method won't return the correct value.

How to reproduce:

FenwickTree ft(10);
for (int i = 1; i <= 10; i++){
    ft.update(i,1);
}
for (int i = 1; i <= 10; i++){
    cout << ft.select(i) << "\n";
}

Expected output:

Obtained result:

Code execution: https://ideone.com/k5IMXN

Concise way to write the python io

Tks for your effort! Wonderful code!
Just a little improvement for python io.

class SamInput(object):
    def __init__(self):
        self.inp = []
        for i in sys.stdin:
            i = i.replace("\n", "")
            j = list(i.split())
            self.inp.append(j)

canbe writed as:

def __init__(self):
        self.inp=[line.split() for line in list(sys.stdin)]

😅

The code does not work properly

Hello. Looks like this code does not work properly.

cpbook-code/ch6/string_alignment.py

Line 1 in a49dd07

def main():

Consider this example
A = "horse"
B = "ros"

Actual result:
table[n][m] outputs 1

Expected result:
table[n][m] should output 3

Add License

Great tool and website!
Under which license is your code released? Apache 2.0?

LF and CR+LF

Currently some files use LF for line endings while some files use CR+LF for line endings. It would be good to convert all of them to LF.

Mistake at cpbook-code/ch3/dp/LIS.cpp

Tks your wonderful effort, great book!
But sorry some problem about the function int LIS(int i)
LIS is a special DP problem, need to check every element of the array. So a better approach is bottom-up that use tabulation.
But cpbook-code/ch3/dp/LIS.cpp build the DP using DAG to find the single-source longest path. This method only works for the case that the last element of the array is the max, and initial ans=1. But not for the case like {5 4 2 3 1}.
Because LIS is special that need to ensure every element would be recursively
checked!
Additional caution, if initial using ans=0 then, only the first element of the array is the min, the base case i==0 would be recursively visited for every check that should be recursively visited. So using ans=0 need to ensure the first element is the min of the arrary.

typo in sa_lcp.py

in this file it does not seem that we are using globals so in this code m is missing as a parameter (n is also missing but in main its defined as just len of the string)
def longest_common_substring(sa, lcp):
idx = 0
max_lcp = -1
for i in range(1, n):
if (sa[i] < m) != (sa[i-1] < m) and lcp[i] > lcs:
max_lcp = lcp[i]
idx = i
return (max_lcp, idx)

Area of polygon gives wrong answer

In polygon.cpp I think the shoelace formula is missing the a_n * b1 - b_n * a_1 term, and in the alternative area function is missing the triangle with vertices O, P[0], P[P.size() -1]. Am I correct in thinking this?

func clear bit is wrong

https://github.com/stevenhalim/cpbook-code/blob/26fb7376417799f8b84b504d46ea7c8c0cf71c29/ch2/lineards/bit_manipulation.py#L9C1-L9C1

Illegal C++ identifiers

This repository has a few C++ code that uses variable with name that begins with an underscore followed by an uppercase letter. However, this is technically undefined behavior in C++.

According to https://en.cppreference.com/w/cpp/language/identifiers,

the identifiers that begin with an underscore followed by an uppercase letter are reserved;

"Reserved" here means that the standard library headers #define or declare such identifiers for their internal needs, the compiler may predefine non-standard identifiers of that kind, and that name mangling algorithm may assume that some of these identifiers are not in use. If the programmer uses such identifiers, the behavior is undefined.

Possible bug in articulation.[cpp|py|...]

This condition doesn't check for node u's parent to see if it's a root node.

cpbook-code/ch4/traversal/articulation.cpp

Line 24 in c3fb85a

if (dfs_low[v] >= dfs_num[u]) // for articulation point

I think it should be something like this:
if (dfs_parent[u] != -1 && dfs_low[v] >= dfs_num[u])

Incorrect updating in Segment Tree?

cpbook-code/ch2/ourown/segmenttree_ds.cpp

Line 64 in fa53432

st[p] = (lsubtree <= rsubtree) ? st[l(p)] : st[r(p)];

I think that this line should just use conquer(lsubtree,rsubtree), as currently it doesn't use the lazy value if applicable, and only checks for the minimum rather than what the conquer function specifies

CP Book 4

Possible error found in CP4 book 2

Hi,
I've been looking at the KMP example on pg 335 in Chapter 6 of CP4 book 2.
I noticed that in the examples you seem to preprocess a table that is pattern size + 1,
so that the kmpSearch can reset correctly once a match is found ( j = b[j])
However the kmpPreprocess function seems to only iterate for i < m.
Assuming m is the size of the pattern string we won't fully fill out the table,
should the preprocessing iterate for i <= m ? Or is m here the size of the pattern+1?
Or maybe I am misunderstanding something. Any clarification is appreciated!

Thanks,
Anup

StopIteration Error for IO_in1

I tried to run this code and received a StopIteration error.

# IO_in1.txt
import sys
inputs = iter(sys.stdin.readlines())
TC = int(next(inputs))
for _ in range(TC):
    print(sum(map(int, next(inputs).split())))

Can you tell me how to prevent this traceback?

---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-221-4615b4523133> in <module>
      1 import sys
      2 inputs = iter(sys.stdin.readlines())
----> 3 TC = int(next(inputs))
      4 for _ in range(TC):
      5     print(sum(map(int, next(inputs).split())))

StopIteration:

Missing code translations

As of 13 July 2020, getting much better for java/py/ml
Let's reduce this as much as possible before 18 July 2020.

Missing Java
ch3/greedy/ballotboxes_UVa12390.java (direct conversion from cpp and using BufferedReader/PrintWriter still TLE...)
ch6/Trie.java
ch8/UVa10243.java (direct conversion from cpp and using BufferedReader/PrintWriter still TLE...)
ch9/mcmf.java (Felix has done this, check with him)

Missing Python
ch2/lineards/array_algorithms.py (still incomplete)
ch7/triangles.py (incomplete, too long)
ch7/polygon.py (incomplete, too long)

Missing OCaml
ch2/lineards/array_algorithms.ml (OCaml array operations)
ch3/greedy/grass_UVa10382.ml
ch5/factovisors_UVa10139.ml
ch4/traversal/toposort.ml
ch4/traversal/cyclecheck.ml
ch4/traversal/articulation.ml
ch4/hierholzer.ml
ch5/modInverse.ml
ch5/combinatorics.ml
ch6/Trie.ml
ch9/GaussianElimination.ml
ch9/mcmf.ml
ch9/LCA.ml

Also missing but in reality a low priority task
ch2/nonlineards/AVL.py (will be very tedious)
ch2/nonlineards/AVL.ml (will be very tedious)
ch3/greedy/ballotboxes_UVa12390.ml (probably TLE too)
ch5/UVa01230.ml (no OCaml in UVa)
ch7/UVa11817.ml (no OCaml in UVa)
ch8/UVa11195.java (probably TLE)
ch8/UVa11195.py (probably TLE)
ch8/UVa11212.java (probably TLE)
ch8/UVa11212.py (probably TLE)
ch8/UVa01099.java (probably TLE)
ch8/UVa01099.py (probably TLE)
ch8/UVa11065.py (probably TLE)
ch8/UVa10243.ml (no OCaml in UVa)
ch8/UVa11646.ml (no OCaml in UVa)
ch8/UVa01079.py (probably TLE)
ch9/UVa11616.ml (no OCaml in UVa)
ch9/UVa10181.py (probably TLE)
ch9/UVa10181.ml (no OCaml in UVa)

FTree.select throws IndexError: list index out of range

Hello.
Looks like there is a bug in the select function

cpbook-code/ch2/ourown/fenwicktree_ds.py

Line 36 in fa53432

if k > self.ft[i + p]:

Steps to reproduce:

ft = FTree([1,2,3,4,5])
ft.select(15) # this throws IndexError: list index out of range in line 36

Expected behavior:
According to CP4 book, the select function "finds the smallest index/key i so that the cumulative frequency in the range [1..i]>=k". So the function should return 6 because the smallest index (of FTree.ft array) where the cumulative frequency is at least 15 is 6

	int extEuclid(int a, int b, int &x, int &y) { // pass x and y by ref
	int xx = y = 0;
	int yy = x = 1;
	while (b) { // repeats until b == 0
	int q = a/b;
	tie(a, b) = tuple(b, a%b);
	tie(x, xx) = tuple(xx, x-q*xx);
	tie(y, yy) = tuple(yy, y-q*yy);
	}
	return a; // returns gcd(a, b)
	}

	int modInverse(int b, int m) { // returns b^(-1) (mod m)
	int x, y;
	int d = extEuclid(b, m, x, y); // to get bx + my == d
	if (d != 1) return -1; // to indicate failure
	// bx + my == 1, now apply (mod m) to get b*x == 1 (mod m)
	return (x+m)%m; // this is the answer
	}

	int hash_fast(int L, int R) { // O(1) hash of any substr
	if (L == 0) return h[R]; // h is the prefix hashes
	int ans = 0;
	ans = ((h[R] - h[L-1]) % M + M) % M; // compute differences
	ans = ((ll)ans * modInverse(Pow[L], M)) % M; // remove P[L]^-1 (mod M)
	return ans;
	}

	point[] P = new point[6];
	P[0] = new point(2, 2);
	P[1] = new point(4, 3);
	P[2] = new point(2, 4);
	P[3] = new point(6, 6);
	P[4] = new point(2, 6);
	P[5] = new point(6, 5);

	P2.assign(L2_n, 0);
	L2.assign(1<<L2_n, 0);
	for (int i = 0; i <= L2_n; ++i) {
	P2[i] = (1<<i); // to speed up 2^i
	L2[(1<<i)] = i; // to speed up log_2(i)
	}

	int t = b; b = a%b; a = t;
	t = xx; xx = x-q*xx; x = t;
	t = yy; yy = y-q*yy; y = t;