`
文章列表
package cn.analysys.stream.state import java.nio.ByteBuffer import cn.analysys.meta.MetaMapInfo import kafka.serializer.StringDecoder import org.apache.spark.SparkConf import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.kafka.{HasOffsetRanges, KafkaUtils, ...
结论:     testHllcError(10, tt)    31 开始出现误差     testHllcError(11, tt)     33 开始出现误差     testHllcError(13, tt)    42 开始出现误差     testHllcError(14, tt)    100 开始出现误差 package hllctest import org.scalatest.FlatSpec import org.spark.sqludf.HLLCounter import scala.collection.mutable import scala.util ...
package hllctest import java.util import org.scalatest.{FlatSpec} import org.spark.sqludf.HLLCounter import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.util.Random class HllcCrossSetTest extends FlatSpec {   val ramdom = new Random()   val m = 18   // 用于验证h ...
spark-hive-thriftserver  本地调试 1 再加入 maven 2 加入  spark/jars 下面的包 起 hivethrift Server 类 , 会出现一个  找不到 xml .elelement 类的 问题。  1  要先加入  spark/jars 下面的包 2 再加入 maven , 就 OK , 估计是包冲突。

spark SQL conf

org.apache.spark.sql.internal.SQLConf org.apache.spark.sql.hive.HiveUtils
jstack -l  pid /opt/soft/jdk/bin/jstat -gcutil 124485 1000 1000 /opt/soft/jdk/bin/jmap -dump:format=b,file=124485.dump 124485 /opt/soft/jdk/bin/jmap -histo  124485
sudo  yum install haproxy   257  yun -ql haproxy   258  rpm -ql haproxy   259  cat /share/doc/haproxy-1.5.18/haproxy.cfg   260  cat /usr//share/doc/haproxy-1.5.18/haproxy.cfg   261  top   262  mkdir /usr/local/haproxy/logs   263  sudo mkdir -p /usr/local/haproxy/logs   264  ls   265  haproxy -f /home ...
  var udfNotInited = true   override def newExecuteStatementOperation(       parentSession: HiveSession,       statement: String,       confOverlay: JMap[String, String],       async: Boolean): ExecuteStatementOperation = synchronized {     val sqlContext = sessionToContexts.get(parentSession.getSe ...

hive spark conf

CREATE TABLE org_userbehavior_all_yunzhi ( user_id Int ,event_time bigint ,behivior_id SMALLINT ,behivior_name String ,behivior_pop  String ,record_date  String ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE location '/user/hive/warehouse/org_userbehavior_all_yunzhi'; LOAD DATA I ...

yarn spark

--master yarn --deploy-mode client --num-executors 4  --executor-memory 6g --executor-cores 3 --conf spark.default.parallelism=40  --conf  spark.sql.shuffle.partitions=20

get day

  def timeDayNow() = {     var nowMis = timeNow()     nowMis - ((nowMis + 28800l) % 86400)   }

test code 09-18-2

object FunnelUtil {   var gson: Gson = new GsonBuilder().create   val maxFunnelLength = 10   var beginTimestamp = 0   var eventLength = 0   var eventMap : mutable.HashMap[Short,Byte] = null   def initEventMap(eventDesc :String) ={     val initEventMap = new mutable.HashMap[Short,Byte]()     var inde ...

交流 code 09-18

object  DataProcess extends  App {   val spark = SparkSession     .builder()     .appName("UserBehiviorToHHDataPartition")     .getOrCreate()   val dataCollection = spark.sparkContext.textFile("./testdata/ods")   import spark.implicits._   val mergePartitonStroragePath = "./t ...

mvn + scala support

    <build>         <plugins>             <plugin>                 <artifactId>maven-compiler-plugin</artifactId>                 <configuration>                     <source>1.8</source>                     <target>1.8</target>               ...

thriftserver

export SPARK_CONF_DIR=/home/yunzhi.lyz/spark/spark220/conf spark-default.conf  spark.yarn.jars  hdfs:/app/jars/*.jar   # hdfs jar, 每次 submit ,如果 spark/jars 下面 jar 和 HDFS 上一样,用 HDFS上的,优先用 driver 上的jar spark.app.confpath /udfconf/udf.config   # hdfs 配置 start-thriftserver.sh  一定要用 client 模式 , 而 clie ...
Global site tag (gtag.js) - Google Analytics