Libraries
NumPy
NumPy (Numerical Python) is a fundamental library for scientific computing in Python.
1D Matrix
import numpy as np
# Create arraysarr = np.array([1, 2, 3, 4, 5])arr2 = np.array([6, 7, 8, 9, 10])
# Array Creation Functionsnp.zeros(5) # [0, 0, 0, 0, 0]np.ones(5) # [1, 1, 1, 1, 1]np.arange(0, 10, 2) # [0, 2, 4, 6, 8]np.linspace(0, 1, 5) # 5 evenly spaced numbers from 0 to 1
# Basic Statisticsarr.mean() # Averagearr.std() # Standard deviationarr.min() # Minimum valuearr.max() # Maximum valuearr.sum() # Sum of elementsnp.median(arr) # Median value
# Array Operationsarr + 2 # Add to each elementarr * 2 # Multiply each elementnp.sqrt(arr) # Square rootnp.square(arr) # Square each elementarr + arr2 # Element-wise additionarr * arr2 # Element-wise multiplication
# Array Informationarr.shape # Dimensionsarr.size # Number of elementsarr.dtype # Data type
# Indexing & Slicingarr[0] # First elementarr[-1] # Last elementarr[1:4] # Elements from index 1 to 3arr[::2] # Every second element
# Filteringarr[arr > 2] # Elements greater than 2arr[(arr > 2) & (arr < 5)] # Multiple conditions
# Reshaping & Transformingarr.reshape(5,1) # Convert to 2D arrayarr.repeat(2) # Repeat each elementnp.concatenate([arr, arr2]) # Join arrays
# Sortingnp.sort(arr) # Sort arrayarr.argsort() # Get sorted indicesnp.argmax(arr) # Index of maximum valuenp.argmin(arr) # Index of minimum value
# Mathematical Operationsnp.exp(arr) # Exponentialnp.log(arr) # Natural logarithmnp.sin(arr) # Sinenp.cos(arr) # Cosine
2D Matrix
import numpy as np
# Create 2D arraysarr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])arr2 = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])
# Creation Functionsnp.zeros((3, 3)) # 3x3 matrix of zerosnp.ones((3, 3)) # 3x3 matrix of onesnp.eye(3) # 3x3 identity matrixnp.full((3, 3), 5) # 3x3 matrix filled with 5
# Basic Statisticsarr.mean() # Average of all elementsarr.mean(axis=0) # Column meansarr.mean(axis=1) # Row meansarr.sum(axis=0) # Column sumsarr.sum(axis=1) # Row sums
# Matrix Operationsarr + arr2 # Element-wise additionarr * arr2 # Element-wise multiplicationarr.dot(arr2) # Matrix multiplicationarr.T # Transposenp.linalg.inv(arr) # Inversenp.linalg.det(arr) # Determinant
# Indexing & Slicingarr[0, 0] # First elementarr[1:] # Second row onwardsarr[:, 1] # Second columnarr[0:2, 0:2] # 2x2 sub-matrix
# Shape Manipulationarr.reshape(1, 9) # Reshape to 1x9arr.flatten() # Convert to 1D arraynp.vstack([arr, arr2])# Vertical stacknp.hstack([arr, arr2])# Horizontal stack
# Filteringarr[arr > 5] # Elements > 5arr[arr % 2 == 0] # Even elements
# Linear Algebraeigenvals, eigenvecs = np.linalg.eig(arr) # Eigenvalues & eigenvectorsu, s, vh = np.linalg.svd(arr) # Singular Value Decompositionnp.linalg.matrix_rank(arr) # Matrix ranknp.trace(arr) # Matrix trace
# Broadcastingarr + 1 # Add 1 to all elementsarr * 2 # Multiply all by 2arr ** 2 # Square all elements
# Sortingnp.sort(arr, axis=0) # Sort each columnnp.sort(arr, axis=1) # Sort each row
# Advanced Indexingmask = arr > 5arr[mask] # Boolean indexingarr[[0, 2], [1, 1]] # Select specific elementsnp.diag(arr) # Get diagonal elementsnp.triu(arr) # Upper triangular matrixnp.tril(arr) # Lower triangular matrix
# Statistical Operationsnp.cov(arr) # Covariance matrixnp.corrcoef(arr) # Correlation matrixnp.percentile(arr, 75) # 75th percentilenp.quantile(arr, [0.25, 0.75]) # Multiple quantilesarr.var(axis=0) # Variance along columnsarr.std(axis=1) # Standard deviation along rows
# Matrix Manipulationsnp.pad(arr, 1) # Pad matrix with zerosnp.roll(arr, 1, axis=0) # Roll elements along rowsnp.rot90(arr) # Rotate matrix 90 degreesnp.flip(arr, axis=0) # Flip matrix verticallynp.flip(arr, axis=1) # Flip matrix horizontally
# Advanced Linear Algebranp.linalg.solve(arr, b) # Solve linear equationsnp.linalg.norm(arr) # Matrix normnp.linalg.matrix_power(arr, 2) # Matrix powernp.linalg.qr(arr) # QR decompositionnp.linalg.cholesky(arr) # Cholesky decomposition
# Element-wise Operationsnp.maximum(arr, arr2) # Element-wise maximumnp.minimum(arr, arr2) # Element-wise minimumnp.clip(arr, 2, 7) # Clip values between 2 and 7np.round(arr, decimals=1) # Round to 1 decimalnp.abs(arr) # Absolute values
# Aggregation Functionsnp.argmax(arr, axis=0) # Index of max in each columnnp.argmin(arr, axis=1) # Index of min in each rownp.any(arr > 5, axis=0) # Test if any element > 5np.all(arr > 0, axis=1) # Test if all elements > 0np.count_nonzero(arr > 5) # Count elements > 5
# Splitting and Combiningnp.hsplit(arr, 3) # Split horizontallynp.vsplit(arr, 3) # Split verticallynp.tile(arr, (2, 2)) # Repeat array 2x2np.repeat(arr, 2, axis=0) # Repeat rowsnp.repeat(arr, 2, axis=1) # Repeat columns
# Random Samplingnp.random.choice(arr.flatten(), 5) # Random samplingnp.random.shuffle(arr) # Shuffle array in-placenp.random.permutation(arr) # Shuffled copy of array
# Set Operationsnp.unique(arr) # Unique elementsnp.intersect1d(arr, arr2) # Intersectionnp.union1d(arr, arr2) # Unionnp.setdiff1d(arr, arr2) # Set difference
# Broadcasting with Row/Column Vectorsrow_means = arr.mean(axis=1, keepdims=True)arr - row_means # Subtract mean from each rowcol_sums = arr.sum(axis=0)arr / col_sums # Normalize columns
Random Number Generation
import numpy as np
# Set random seed for reproducibilitynp.random.seed(42)
# Basic Random Generationrand_uniform = np.random.rand(5) # Uniform [0,1]rand_normal = np.random.randn(5) # Standard normal distributionrand_int = np.random.randint(0, 10, 5) # Random integers [0,10]
# Common Distributionsnormal = np.random.normal(loc=0, scale=1, size=1000) # Normal (Gaussian)uniform = np.random.uniform(low=0, high=10, size=1000) # Uniformpoisson = np.random.poisson(lam=5, size=1000) # Poissonbinomial = np.random.binomial(n=10, p=0.5, size=1000) # Binomialexponential = np.random.exponential(scale=1.0, size=1000) # Exponential
# Samplingdata = np.array([1, 2, 3, 4, 5])random_sample = np.random.choice(data, size=3, replace=False) # Without replacementweighted_sample = np.random.choice(data, size=3, p=[0.1, 0.2, 0.4, 0.2, 0.1]) # With weights
# Random Matricesrand_matrix = np.random.rand(3, 3) # Uniform random matrixnormal_matrix = np.random.normal(0, 1, (3, 3)) # Normal random matrix
# Shufflingarr = np.array([1, 2, 3, 4, 5])np.random.shuffle(arr) # In-place shuffleshuffled = np.random.permutation(arr) # Return shuffled copy
# Random Generator Object (newer API)rng = np.random.default_rng(42)rng_normal = rng.normal(0, 1, 1000)rng_choice = rng.choice(data, size=3)
Pandas
import pandas as pdimport numpy as np
# Creating DataFramesdf = pd.DataFrame({ 'A': [1, 2, 3], 'B': ['a', 'b', 'c'], 'C': [1.1, 2.2, 3.3]})
# From different sourcesdf_csv = pd.read_csv('file.csv')df_excel = pd.read_excel('file.xlsx')df_dict = pd.DataFrame.from_dict(data)df_numpy = pd.DataFrame(np.random.randn(3, 3))
# Basic Operationsdf.head() # First 5 rowsdf.tail() # Last 5 rowsdf.info() # DataFrame infodf.describe() # Statistical summarydf.shape # Dimensionsdf.columns # Column namesdf.index # Row indicesdf.dtypes # Data types
# Selectiondf['A'] # Select columndf[['A', 'B']] # Multiple columnsdf.loc[0] # Select row by labeldf.iloc[0] # Select row by positiondf.loc[0:2, 'A':'C'] # Select by label rangedf.iloc[0:2, 0:2] # Select by position range
# Filteringdf[df['A'] > 2] # Simple conditiondf[(df['A'] > 2) & (df['C'] < 3)] # Multiple conditionsdf.query('A > 2 and C < 3') # Query method
# Missing Datadf.isna() # Check missingdf.dropna() # Drop missingdf.fillna(0) # Fill missing with 0df.interpolate() # Interpolate missing
# Grouping and Aggregationdf.groupby('A').mean()df.groupby(['A', 'B']).sum()df.groupby('A').agg(['mean', 'sum'])
# Sortingdf.sort_values('A') # Sort by columndf.sort_values(['A', 'B']) # Sort by multiple columnsdf.sort_index() # Sort by index
# Data Transformationdf['D'] = df['A'] * 2 # New columndf.apply(lambda x: x * 2) # Apply functiondf.applymap(lambda x: str(x)) # Apply to each elementdf['B'] = df['B'].astype(str) # Change data type
# Merging and Joiningpd.merge(df1, df2, on='key') # Merge on keypd.concat([df1, df2]) # Concatenatedf1.join(df2) # Join on index
# Time Seriesdates = pd.date_range('20230101', periods=6)ts = pd.Series(np.random.randn(6), index=dates)ts.resample('M').mean() # Monthly resamplingts.shift(1) # Shift valuests.rolling(2).mean() # Rolling average
# String Operationsdf['B'].str.upper() # Uppercasedf['B'].str.contains('a') # Containsdf['B'].str.replace('a', 'x') # Replace
# Statistical Methodsdf.corr() # Correlationdf.cov() # Covariancedf.kurt() # Kurtosisdf.skew() # Skewness
# Data Cleaningdf.drop_duplicates() # Remove duplicatesdf.replace({'A': {1: 10}}) # Replace valuesdf.rename(columns={'A': 'X'}) # Rename columnsdf.set_index('A') # Set index
# Advanced Operationsdf.pivot_table( # Pivot table values='A', index='B', columns='C', aggfunc='mean')df.melt( # Unpivot id_vars=['A'], value_vars=['B', 'C'])df.eval('D = A + C') # Evaluate expression
# Export Datadf.to_csv('output.csv')df.to_excel('output.xlsx')df.to_json('output.json')
# Memory Optimizationdf.memory_usage() # Memory usagedf.select_dtypes(include=['int64']).astype('int32') # Downcast
# Window Functionsdf.expanding().mean() # Expanding windowdf.rolling(window=2).sum() # Rolling windowdf.ewm(alpha=0.5).mean() # Exponential weighted
# Categorical Datadf['cat'] = pd.Categorical(df['B'])df['cat'].cat.codes # Category codesdf['cat'].cat.categories # Category names
# Advanced Indexingdf.set_index(['A', 'B']) # Multi-indexdf.reset_index() # Reset indexdf.swaplevel() # Swap index levels
Matplotlib
import matplotlib.pyplot as pltimport numpy as np
# Basic Line Plotx = np.linspace(0, 10, 100)plt.plot(x, np.sin(x))plt.title('Simple Line Plot')plt.xlabel('x')plt.ylabel('sin(x)')plt.show()
# Multiple Linesplt.plot(x, np.sin(x), label='sin')plt.plot(x, np.cos(x), label='cos')plt.legend()plt.grid(True)plt.show()
# Scatter Plotx = np.random.rand(50)y = np.random.rand(50)plt.scatter(x, y, c='red', alpha=0.5)plt.show()
# Bar Plotcategories = ['A', 'B', 'C', 'D']values = [4, 3, 2, 1]plt.bar(categories, values)plt.show()
# Histogramdata = np.random.randn(1000)plt.hist(data, bins=30)plt.show()
# Subplotsfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))ax1.plot(x, np.sin(x))ax2.plot(x, np.cos(x))plt.show()
# Customizationplt.figure(figsize=(10, 6))plt.plot(x, np.sin(x), 'r--', linewidth=2, label='sin')plt.plot(x, np.cos(x), 'b-.', linewidth=2, label='cos')plt.title('Custom Plot', fontsize=14)plt.xlabel('X axis', fontsize=12)plt.ylabel('Y axis', fontsize=12)plt.legend(fontsize=10)plt.grid(True, linestyle='--', alpha=0.7)plt.show()
# Advanced Plots# Heatmapdata = np.random.rand(10, 10)plt.imshow(data, cmap='hot')plt.colorbar()plt.show()
# 3D Plotfrom mpl_toolkits.mplot3d import Axes3Dfig = plt.figure()ax = fig.add_subplot(111, projection='3d')x = y = np.linspace(-3, 3, 100)X, Y = np.meshgrid(x, y)Z = np.sin(np.sqrt(X**2 + Y**2))ax.plot_surface(X, Y, Z)plt.show()
# Pie Chartsizes = [30, 20, 25, 15]labels = ['A', 'B', 'C', 'D']plt.pie(sizes, labels=labels, autopct='%1.1f%%')plt.show()
# Box Plotdata = [np.random.normal(0, std, 100) for std in range(1, 4)]plt.boxplot(data)plt.show()
# Violin Plotplt.violinplot(data)plt.show()
# Save Plotplt.savefig('plot.png', dpi=300, bbox_inches='tight')
# Style Sheetsplt.style.use('seaborn') # Other options: 'ggplot', 'dark_background'
# Animationfrom matplotlib.animation import FuncAnimation
fig, ax = plt.subplots()xdata, ydata = [], []ln, = ax.plot([], [])
def init(): ax.set_xlim(0, 2*np.pi) ax.set_ylim(-1, 1) return ln,
def update(frame): xdata.append(frame) ydata.append(np.sin(frame)) ln.set_data(xdata, ydata) return ln,
ani = FuncAnimation(fig, update, frames=np.linspace(0, 2*np.pi, 128), init_func=init, blit=True)plt.show()
# Object-Oriented Interfacefig, ax = plt.subplots()ax.plot(x, np.sin(x))ax.set_title('OO Style Plot')ax.set_xlabel('x')ax.set_ylabel('sin(x)')ax.grid(True)plt.show()
# Multiple Plot Typesfig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10, 10))
# Scatter plotax1.scatter(np.random.rand(50), np.random.rand(50))ax1.set_title('Scatter')
# Line plotax2.plot(x, np.sin(x))ax2.set_title('Line')
# Bar plotax3.bar(['A', 'B', 'C'], [3, 2, 1])ax3.set_title('Bar')
# Histogramax4.hist(np.random.randn(1000))ax4.set_title('Histogram')
plt.tight_layout()plt.show()
Seaborn
import seaborn as snsimport pandas as pdimport numpy as np
# Set stylesns.set_style("whitegrid")sns.set_palette("husl")
# Sample datatips = sns.load_dataset("tips")flights = sns.load_dataset("flights")iris = sns.load_dataset("iris")
# Distribution Plots# Histogram and KDEsns.histplot(data=tips, x="total_bill", kde=True)sns.kdeplot(data=tips, x="total_bill", hue="sex")
# Box and Violin Plotssns.boxplot(data=tips, x="day", y="total_bill")sns.violinplot(data=tips, x="day", y="total_bill", hue="sex")
# Categorical Plots# Bar plotssns.barplot(data=tips, x="day", y="total_bill")sns.countplot(data=tips, x="day")
# Strip and Swarm Plotssns.stripplot(data=tips, x="day", y="total_bill", jitter=True)sns.swarmplot(data=tips, x="day", y="total_bill")
# Relational Plots# Scatter plotssns.scatterplot(data=tips, x="total_bill", y="tip", hue="sex")sns.relplot(data=tips, x="total_bill", y="tip", col="sex")
# Line plotssns.lineplot(data=flights, x="year", y="passengers", ci=None)
# Regression Plots# Simple regressionsns.regplot(data=tips, x="total_bill", y="tip")
# Complex regressionsns.lmplot(data=tips, x="total_bill", y="tip", col="sex", row="time", hue="smoker")
# Matrix Plots# Correlation matrixcorr = tips.corr()sns.heatmap(corr, annot=True, cmap="coolwarm")
# Pair plotssns.pairplot(iris, hue="species")
# Joint plotssns.jointplot(data=tips, x="total_bill", y="tip", kind="reg", height=7)
# Categorical relationships# Point plotssns.pointplot(data=tips, x="day", y="total_bill", hue="sex")
# Facet Gridg = sns.FacetGrid(tips, col="time", row="smoker")g.map(sns.scatterplot, "total_bill", "tip")
# Advanced Customization# Custom figure sizeplt.figure(figsize=(10, 6))sns.boxplot(data=tips, x="day", y="total_bill")
# Custom color palettesns.set_palette("Set2")sns.scatterplot(data=iris, x="sepal_length", y="sepal_width", hue="species")
# Statistical Estimation# Confidence intervalssns.lmplot(data=tips, x="total_bill", y="tip", ci=95)
# Bootstrap resamplingsns.regplot(data=tips, x="total_bill", y="tip", n_boot=1000)
# Complex Visualizations# Cluster mapsns.clustermap(corr, annot=True, cmap="coolwarm")
# Distribution visualizationsns.displot(data=tips, x="total_bill", col="time", row="sex", kind="kde")
# Multiple plot typesg = sns.JointGrid(data=tips, x="total_bill", y="tip")g.plot_joint(sns.scatterplot)g.plot_marginals(sns.histplot)
# Time seriessns.lineplot(data=flights, x="year", y="passengers", hue="month", style="month")
# Categorical plots with multiple variablessns.catplot(data=tips, x="day", y="total_bill", kind="violin", hue="sex", split=True)
# Style Themes# Available themes: darkgrid, whitegrid, dark, white, tickssns.set_theme(style="darkgrid")sns.set_context("notebook", font_scale=1.5)
# Save plotplt.savefig('seaborn_plot.png', dpi=300, bbox_inches='tight')
CRUD with SQLite
import sqlite3from sqlite3 import Error
# Connect to database (creates if not exists)def create_connection(db_file): conn = None try: conn = sqlite3.connect(db_file) print(f"Connected to {db_file}, SQLite version: {sqlite3.version}") return conn except Error as e: print(f"Error: {e}") return conn
# Create tabledef create_table(conn): try: sql = '''CREATE TABLE IF NOT EXISTS users ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, email TEXT UNIQUE, age INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP );''' conn.execute(sql) print("Table created successfully") except Error as e: print(f"Error: {e}")
# Insert operationsdef insert_user(conn, user): sql = '''INSERT INTO users(name, email, age) VALUES(?,?,?)''' try: cur = conn.cursor() cur.execute(sql, user) conn.commit() print(f"Inserted user with id: {cur.lastrowid}") return cur.lastrowid except Error as e: print(f"Error: {e}")
# Bulk insertdef insert_many_users(conn, users): sql = '''INSERT INTO users(name, email, age) VALUES(?,?,?)''' try: cur = conn.cursor() cur.executemany(sql, users) conn.commit() print(f"Inserted {cur.rowcount} users") except Error as e: print(f"Error: {e}")
# Read operationsdef select_all_users(conn): try: cur = conn.cursor() cur.execute("SELECT * FROM users") rows = cur.fetchall() for row in rows: print(row) return rows except Error as e: print(f"Error: {e}")
def select_user_by_id(conn, id): try: cur = conn.cursor() cur.execute("SELECT * FROM users WHERE id=?", (id,)) row = cur.fetchone() print(row) return row except Error as e: print(f"Error: {e}")
# Update operationsdef update_user(conn, user): sql = '''UPDATE users SET name = ?, email = ?, age = ? WHERE id = ?''' try: cur = conn.cursor() cur.execute(sql, user) conn.commit() print(f"Updated user with id: {user[3]}") except Error as e: print(f"Error: {e}")
# Delete operationsdef delete_user(conn, id): try: cur = conn.cursor() cur.execute("DELETE FROM users WHERE id=?", (id,)) conn.commit() print(f"Deleted user with id: {id}") except Error as e: print(f"Error: {e}")
# Advanced queriesdef advanced_queries(conn): try: cur = conn.cursor()
# Filtering cur.execute("SELECT * FROM users WHERE age > 25")
# Ordering cur.execute("SELECT * FROM users ORDER BY age DESC")
# Aggregation cur.execute("SELECT AVG(age), COUNT(*) FROM users")
# Grouping cur.execute(""" SELECT age, COUNT(*) FROM users GROUP BY age HAVING COUNT(*) > 1 """)
# LIKE query cur.execute("SELECT * FROM users WHERE name LIKE 'J%'")
# Complex conditions cur.execute(""" SELECT * FROM users WHERE age BETWEEN 20 AND 30 AND email LIKE '%.com' """)
except Error as e: print(f"Error: {e}")
# Example usagedef main(): database = "pythonsqlite.db" conn = create_connection(database)
if conn is not None: # Create table create_table(conn)
# Insert single user user = ('John Doe', 'john@example.com', 25) user_id = insert_user(conn, user)
# Insert multiple users users = [ ('Jane Doe', 'jane@example.com', 22), ('Bob Smith', 'bob@example.com', 28) ] insert_many_users(conn, users)
# Read operations print("\nAll users:") select_all_users(conn)
print("\nUser by ID:") select_user_by_id(conn, user_id)
# Update user updated_user = ('John Updated', 'john.updated@example.com', 26, user_id) update_user(conn, updated_user)
# Delete user delete_user(conn, user_id)
# Close connection conn.close() else: print("Error: Cannot create database connection")
if __name__ == '__main__': main()
# Using with pandasdef sql_with_pandas(): import pandas as pd
conn = create_connection("pythonsqlite.db") if conn is not None: # Read SQL query into DataFrame df = pd.read_sql_query("SELECT * FROM users", conn)
# Write DataFrame to SQL df.to_sql('users_backup', conn, if_exists='replace', index=False)
conn.close()
# Using with context managerdef using_context_manager(): with sqlite3.connect("pythonsqlite.db") as conn: cur = conn.cursor() cur.execute("SELECT * FROM users") rows = cur.fetchall() for row in rows: print(row)