How to do RANDOM sample in a Pandas DataFrame in Python

January 13, 2019

Contact Seller

Guest Post Description

How to do RANDOM sample in a Pandas DataFrame in Python

def Kickstarter_Example_99(): 
    print()
    print(format('randomly sample a Pandas DataFrame','*^82'))    
    import warnings
    warnings.filterwarnings("ignore")
    # load libraries
    import pandas as pd
    import numpy as np
    # Create dataframe
    raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 
                'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'], 
                'age': [42, 52, 36, 24, 73], 
                'preTestScore': [4, 24, 31, 2, 3],
                'postTestScore': [25, 94, 57, 62, 70]}
    df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 
                                           'preTestScore', 'postTestScore'])
    print(); print(df)
    # Select a random subset of 2 without replacement
    print(); print(df.take(np.random.permutation(len(df))[:2]))
    # Select a random subset of 4 without replacement
    print(); print(df.take(np.random.permutation(len(df))[:4]))
    # random sample of df    
    df1 = df.sample(3)
    print(); print(df1)
Kickstarter_Example_99()

How to do RANDOM sample in a Pandas DataFrame in Python

Add a Review

You must be logged in to post a comment.