My purpose in learning Python is mainly for numeric and scientific applications, such as AI deep learning. So my notes will focus mainly on using Python for these types of applications.
The code below demonstrates a little about Python such as some variables and types; lists; subsetting and slicing lists; adding, removing and copying lists; some of its built-in functions and built-in object methods. Then it demonstrates how to use the NumPy package specifically the NumPy array, some NumPy statistical methods, and how to generate your own sample data. You can run this code online and I recommend using repl.it.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Python variables and types | |
# say we want to calculate bmi = 703 x weight (lbs) / height (in) ^ 2 | |
weight = 150 | |
height = 70 | |
bmi = 703 * (150 / (70 ** 2)) # ** is the operator to calculate powers | |
print(bmi) # prints value of bmi on the output console | |
print(type(weight)) # type is int | |
print(type(height)) # type is int | |
print(type(bmi)) # type is float | |
string = "Hello, World!" | |
string = 'Hello, World!' | |
boolean = True | |
print(type(string)) # type is str | |
print(type(boolean)) # type is bool | |
# Python lists | |
# say we want to list the GTA 2016 population in millions | |
gta_pop = [2.7, 0.6, 1.4, 1.1, 0.5] | |
# you can have mixed types in the list | |
gta_pop = ["toronto", 2.7, "durham", 0.6, "peel", 1.4, | |
"york", 1.1, "halton", 0.5] | |
# you can have lists inside the list | |
gta_pop = [["toronto", 2.7], ["durham", 0.6], ["peel", 1.4], | |
["york", 1.1], ["halton", 0.5]] | |
print(gta_pop) | |
print(type(gta_pop)) # type is list | |
# subsetting or indexing lists | |
# a 0 index points to the first element | |
print(gta_pop[0]) # prints toronto population | |
print(gta_pop[2]) # prints peel population | |
# a -1 index points to the last element | |
# you can index the list backwards starting from the last element | |
print(gta_pop[-1]) # prints halton population | |
print(gta_pop[-3]) # prints peel population | |
# slicing lists – selecting multiple elements in the list | |
# note that the ending index is exclusive | |
print(gta_pop[1:3]) # prints durham and peel populations | |
print(gta_pop[2:]) # prints peel, york, and halton populations | |
print(gta_pop[:3]) # prints toronto, durham, and peel populations | |
# adding and removing list elements | |
del(gta_pop[4]) # removes halton population | |
print(gta_pop) | |
gta_pop = gta_pop + [["halton", 0.5]] # adds halton population | |
#gta_pop.append([["halton", 0.5]]) # or you can use append() method | |
print(gta_pop) | |
# copying lists by value (not by reference) | |
gta_pop_copy = list(gta_pop) | |
gta_pop_copy = gta_pop[:] # or use slicing | |
print(gta_pop_copy) | |
# Python built-in functions | |
print(max([1, 2, 3, 4, 5])) # prints 5 | |
print(round(1.2345, 2)) # prints 1.23 | |
help(max) # opens up documentation on max() function | |
# Python built-in object methods | |
mylist = [1, 2, 2, 3, 4, 4, 4, 5] | |
print(mylist.count(4)) # prints 3 as there are 3 occurrences of element 4 in the list | |
print(mylist.index(2)) # prints 1 as the index of the first occurrence of element 2 in the list | |
mystr = "rodan" | |
print(mystr.capitalize()) # prints Rodan | |
# Python packages | |
# importing NumPy package | |
import numpy | |
print(numpy.array([1, 2, 3])) | |
# import specific function in package | |
from numpy import array as nparray | |
print(nparray([4, 5, 6])) | |
# NumPy array is an alternative to Python list | |
# it provides calculations over entire arrays | |
# it is easy and fast and specifically created for data science | |
# it can only contain single type | |
# say you have 2 datasets containing the weight and height of 5 people | |
npa_weight = nparray([150, 165, 134, 210, 110]) | |
npa_height = nparray([70, 68, 65, 72, 61]) | |
npa_bmi = 703 * (npa_weight / (npa_height ** 2)) | |
print(npa_bmi) # prints [ 21.52040816 25.08542388 22.29633136 28.47800926 20.78204784] | |
print(npa_bmi > 22) # prints [False True True True False] | |
print(npa_bmi[npa_bmi > 22]) # prints [ 25.08542388 22.29633136 28.47800926] | |
print(type(npa_weight)) # prints numpy.ndarray | |
npa_2d = nparray([[150, 165, 134, 210, 110], | |
[70, 68, 65, 72, 61]]) | |
print(npa_2d) | |
print(npa_2d.shape) # prints (2, 5) – 2 rows and 5 columns | |
# NumPy array subsetting | |
print(npa_2d[0]) # prints [150 165 134 210 110], the first row | |
print(npa_2d[0][2]) # prints 134, the thrid element of the first row | |
print(npa_2d[0, 2]) # same as above | |
# NumPy array slicing | |
print(npa_2d[:, 1:3]) # prints [[165 134] [ 68 65]] | |
print(npa_2d[0, :]) # prints [150 165 134 210 110], the entire first row | |
# NumPy statistics | |
print(numpy.mean(npa_weight)) # prints 153.8 | |
print(numpy.median(npa_weight)) # prints 150.0 | |
print(numpy.std(npa_weight)) # prints 33.4926857687 | |
print(numpy.mean(npa_height)) # prints 67.2 | |
print(numpy.std(npa_height)) # prints 3.86781592116 | |
# NumpPy generate data | |
# using numpy.random.normal(), passing mean, std, and # of samples, in this case 10 samples | |
gd_weight = numpy.round(numpy.random.normal(153.8, 33.50, 10), 2) | |
print(gd_weight) # prints [ 130.67 147.44 153.69 104.84 137.85 164.96 150.62 153.65 194.35 229.11] | |
#gd_height = numpy.round(numpy) | |
gd_height = numpy.round(numpy.random.normal(67.2, 3.87, 10), 2) | |
print(gd_height) # prints [ 72.44 60.82 71.27 70.61 66.06 67.12 68.33 67.81 61.84 74.19] | |
# using numpy.column_stack() will combine weight and height of each 10 samples | |
gd_wxh = numpy.column_stack((gd_weight, gd_height)) | |
print(gd_wxh) # prints | |
#[[ 130.67 72.44] | |
# [ 147.44 60.82] | |
# [ 153.69 71.27] | |
# [ 104.84 70.61] | |
# [ 137.85 66.06] | |
# [ 164.96 67.12] | |
# [ 150.62 68.33] | |
# [ 153.65 67.81] | |
# [ 194.35 61.84] | |
# [ 229.11 74.19]] |
Additional References: