<!DOCTYPE html>
Standard Configs¶
In [4]:
# My Standard Spark Session!
# Python libraries:
import os
import sys
import re
from dateutil import parser
# import datetime
from datetime import datetime
from datetime import date
import builtins
import json
import functools
import operator
from itertools import product
# Numpy & Pandas!
import numpy as np
import pandas as pd
pd.options.display.float_format = '{:20,.2f}'.format
pd.options.display.max_columns = None
# Spark!
from pyspark import SparkContext
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import *
from pyspark.sql import SparkSession, Row
spark = SparkSession.builder.appName("myapp").getOrCreate()
# spark = SparkSession.builder.master("yarn")\
# .config("spark.executor.instances", "32")\
# .config("spark.executor.cores", "4")\
# .config("spark.executor.memory", "4G")\
# .config("spark.driver.memory", "4G")\
# .config("spark.executor.memoryOverhead","4G")\
# .config("spark.yarn.queue","Medium")\
# .appName("myapp")\
# .getOrCreate()
sc = spark.sparkContext
spark.conf.set("spark.sql.sources.partitionColumnTypeInference.enabled", "false")
spark.conf.set("spark.debug.maxToStringFields","true")
In [ ]:
In [ ]: