%%
#1
import pandas as pd
details = {
'Name' : ['Ankit', 'Aishwarya', 'Shaurya', 'Shivangi'],
'Age' : [23, 21, 22, 21],
'University' : ['BHU', 'JNU', 'DU', 'BHU']
}
df = pd.DataFrame(details)
df
%%
#2
import pandas as pd
data = {'Unemployment_Rate': [6.1,5.8,5.7,5.7,5.8,5.6,5.5,5.3,5.2,5.2],
'Stock_Index_Price': [1500,1520,1525,1523,1515,1540,1545,1560,1555,1565]
} #can be anything but maintain this format
df = pd.DataFrame(data,columns=['Unemployment_Rate','Stock_Index_Price'])
print (df) # print created dataframe
df.plot(x ='Unemployment_Rate', y='Stock_Index_Price', kind = 'scatter')
%%
#3
import numpy as np
print("An array of 10 zeros:", np.zeros(10))
print("An array of 10 ones:", np.ones(10))
print("An array of 10 tens:", np.ones(10)*10)
%%
#4
import seaborn as sb
import matplotlib.pyplot as plt
d = sb.load_dataset('iris')
sb.pairplot(d)
e = np.random.rand(10,2)
plt.figure() # add to get figure seperately | optional
sb.heatmap(e)
%%
#5
import numpy as np
v = np.arange(20,89)
print("Original vector:", v)
print("All values except the first and last of the said vector:", v[1:-1])
%%
#6
import numpy as np
x = np.array([3, 10, 20, 100,200])
print("Original array:", x)
print("Size of the memory occupied by the said array:", (x.size * x.itemsize))
%%
#9
import numpy as np
import pandas as pd
np_array = np.array([10, 20, 30, 40, 50])
print("NumPy array :", np_array)
new_series = pd.Series(np_array)
print("Converted Pandas series : \n", new_series)
%%
#10
import pandas as pd
left = pd.DataFrame({
'id':[1,2,3],
'Name': ['Alex', 'Amy', 'Allen'],
'subject_id':['sub1','sub2','sub4']})
right = pd.DataFrame({
'id':[1,2,3],
'Name': ['Billy', 'Brian', 'Bran'],
'subject_id':['sub2','sub4','sub3']})
print(pd.merge(left,right,on='id'))
%%
#12
import pandas as pd
date_range = pd.date_range('2020-01-01', periods=45)
print("Date range of perods 45:")
print(date_range)
%%
#13
from datetime import date
f_date = date(2014, 7, 2)
l_date = date(2014, 7, 11)
delta = l_date - f_date
print(delta.days)
%%
15
import pandas as pd
x = pd.read_csv("iris.csv")
print(x[0:3])
%%
#16
bool_arr = np.array([1, 0.5, 0, None, 'a', '', True, False], dtype=bool)
print(bool_arr)
or
bool_arr = np.ones((3,4), dtype=bool)
print(bool_arr)
%%
#17
def maxx(x, y):
"""Get the maximum of two items"""
if x >= y:
return x
else:
return y
maxx(1, 5)
%%
#18
import numpy as np
np.set_printoptions(suppress=True, formatter={'float_kind': '{:f}'.format})
the_array = np.array([3.74, 5162, 13683628846.64, 12783387559.86, 1.81])
print(the_array)
%%
#21
import matplotlib.pyplot as plt
X = range(1, 50)
Y = [value * 3 for value in X]
print("Values of X:")
print(*range(1,50))
print("Values of Y (thrice of X):")
print(Y)
plt.plot(X, Y)
plt.xlabel('x - axis')
plt.ylabel('y - axis')
plt.title('Draw a line.')
plt.show()
%%
#22
import numpy as np
arr = np.random.randint(0,10,(10,10)) # they will give the set
print(arr)
def rel_freq(x):
ar_1 = np.where(arr == x)
return [x, (len(ar_1))/arr.size]
print(rel_freq(4),rel_freq(5))
%%
#23
import statistics
spiList = [5.55, 5.72, 7.3, 7.75, 8.4, 9, 8.8, 8.2]
print(statistics.mean(spiList))
%%
#24
import matplotlib.pyplot as plt
import seaborn as sns
iris = sns.load_dataset("iris")
field_1 = ["sepal_length", "sepal_width"]
field_2 = ["petal_length", "petal_width"]
plt.scatter(x=iris[field_1[0]], y=iris[field_1[1]])
plt.scatter(x=iris[field_2[0]], y=iris[field_2[1]])
%%
#25
from numpy.random import randn
from numpy.random import seed
from scipy.stats import pearsonr
seed random number generator
seed(1)
prepare data
data1 = 20 * randn(1000) + 100
data2 = data1 + (10 * randn(1000) + 50)
calculate Pearson's correlation
corr, _ = pearsonr(data1, data2)
print('Pearsons correlation: %.3f' % corr)