1import pandas as pd
2import pickle
3
4in_path = "" #Path where the large file is
5out_path = "" #Path to save the pickle files to
6chunk_size = 400000 #size of chunks relies on your available memory
7separator = "~"
8
9reader = pd.read_csv(in_path,sep=separator,chunksize=chunk_size,
10 low_memory=False)
11
12
13for i, chunk in enumerate(reader):
14 out_file = out_path + "/data_{}.pkl".format(i+1)
15 with open(out_file, "wb") as f:
16 pickle.dump(chunk,f,pickle.HIGHEST_PROTOCOL)
17