Can get more specific based on what dataset you're using but here's a starter:
import pandas as pd
import matplotlib.pyplot as plt
# Load the data — change the filename to match your file
df = pd.read_parquet("your_file.parquet") # or pd.read_csv("your_file.csv")
# Take a peek
print(f"This dataset has {len(df):,} rows and {len(df.columns)} columns.\n")
print("Columns:", df.columns.tolist())
print("\nFirst few rows:")
print(df.head())
# Pick a column with repeating categories (good for counting)
# We look for text columns where values actually repeat
good_columns = [
col for col in df.columns
if df[col].dtype == "object" and df[col].nunique() < len(df) * 0.5
]
column_to_count = good_columns[0] if good_columns else df.columns[0]
counts = df[column_to_count].value_counts().head(10)
print(f"\nTop 10 values in '{column_to_count}':")
print(counts)
# Make a chart
counts.plot(kind="bar", title=f"Top 10 values in '{column_to_count}'")
plt.tight_layout()
plt.show()
Let me know how it goes!