Spark发射任务源代码分析
合集下载
相关主题
- 1、下载文档前请自行甄别文档内容的完整性,平台不提供额外的编辑、内容补充、找答案等附加服务。
- 2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
- 3、如文档侵犯您的权益,请联系客服反馈,我们会尽快为您处理(人工客服工作时间:9:00-18:30)。
} } }
return launchedTask }
在这个函数中分配 execId.对于所有的 WorkerOffer(executorId, host, cores), 如果可以获取的 CPU 的数量大于完成每个任务所需要的 CPU 数量,就调用了 TaskSetManager 的 resourceOffer()方法,返回一个 task.其中,每个 task 需要的 CPU 个数是从配置文件中读取的,默认为1.然后,该 executor 的 availableCpus -= CPUS_PER_TASK,且 executor 的 availableCpus>=0,否则,换下一个 executor.进行整 个循环,直到所有任务成功发射,返回 true,否则,返回 false;
// Launch tasks returned by a set of resource offers private def launchTasks(tasks: Seq[Seq[TaskDescription]]) {
for (task <- tasks.flatten) {
else { val executorData = executorDataMap(task.executorId) executorData.freeCores -= scheduler.CPUS_PER_TASK executorData.executorEndpoint.send(LaunchTask(new SerializableBuffer(serializedTask)))
} return tasks }
private def resourceOfferSingleTaskSet( taskSet: TaskSetManager, maxLocality: TaskLocality, shuffledOffers: Seq[WorkerOffer], availableCpus: Array[Int], tasks: Seq[ArrayBuffer[TaskDescription]]) : Boolean = {
logDebug("parentName: %s, name: %s, runningTasks: %s".format( taskSet.parent.name, taskSet.name, taskSet.runningTasks))
if (newExecAvail) { taskSet.executorAdded()
然后不断 调用自己的 resourceOfferSingleTaskSet()方法,直到 taskSet 发射成功。
/** * Called by cluster manager to offer resources on slaves. We respond by asking our active task * sets for tasks in order of priority. We fill each node with tasks in a round-robin manner so * that tasks are balanced across the cluster. */ def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = synchronized { // Mark each slave as alive and remember its hostname // Also track if new executor is added var newExecAvail = false for (o <- offers) {
}
lancheTasks()方法的参数是 TaskBaidu Nhomakorabeaescription 的 seq 类型,对于其中的每个 task,序列 化,executorData.freeCores -= 然后发射
CoarseGrainedSchedulerBackend 的 makeOffers()调用 TaskSchedulerImpl 的 resourceOffers()
val workOffers = activeExecutors.map { case (id, executorData) => new WorkerOffer(id, executorData.executorHost, executorData.freeCores)
}.toSeq launchTasks(scheduler.resourceOffers(workOffers))
// Make fake resource offers on all executors
private def makeOffers() { // Filter out executors under killing val activeExecutors = executorDataMap.filterKeys(!executorsPendingToRemove.contains(_)) val workOffers = activeExecutors.map { case (id, executorData) => new WorkerOffer(id, executorData.executorHost, executorData.freeCores) }.toSeq launchTasks(scheduler.resourceOffers(workOffers))
try { for (task <- taskSet.resourceOffer(execId, host, maxLocality)) { tasks(i) += task val tid = task.taskId taskIdToTaskSetManager(tid) = taskSet taskIdToExecutorId(tid) = execId executorsByHost(host) += execId availableCpus(i) -= CPUS_PER_TASK assert(availableCpus(i) >= 0) launchedTask = true }
} }
// Take each TaskSet in our scheduling order, and then offer it each node in increasing order // of locality levels so that it gets a chance to launch local tasks on all of them. // NOTE: the preferredLocality order: PROCESS_LOCAL, NODE_LOCAL, NO_PREF, RACK_LOCAL, ANY var launchedTask = false for (taskSet <- sortedTaskSets; maxLocality <- taskSet.myLocalityLevels) {
其中 TaskSetManager 类的 resourceOffer()方法调用流程
CoarseGrainedSchedulerBackend 的 override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = 方法,注册 executor,包括 executorId,hostPort 和 cores.形成一个 WorkOffer 的列表,并发射任务
do {
launchedTask = resourceOfferSingleTaskSet( taskSet, maxLocality, shuffledOffers, availableCpus, tasks)
} while (launchedTask) }
if (tasks.size > 0) { hasLaunchedTask = true
}
先对所有的 WordOffer 进行 random Shuffle 打乱顺序,并对 TaskSet 进行排序
// Take each TaskSet in our scheduling order, and then offer it each node in increasing order // of locality levels so that it gets a chance to launch local tasks on all of them. // NOTE: the preferredLocality order: PROCESS_LOCAL, NODE_LOCAL, NO_PREF, RACK_LOCAL, ANY
// Randomly shuffle offers to avoid always placing tasks on the same set of workers. val shuffledOffers = Random.shuffle(offers) // Build a list of tasks to assign to each worker. val tasks = shuffledOffers.map(o => new ArrayBuffer[TaskDescription](o.cores)) val availableCpus = shuffledOffers.map(o => o.cores).toArray val sortedTaskSets = rootPool.getSortedTaskSetQueue for (taskSet <- sortedTaskSets) {
private def resourceOfferSingleTaskSet( taskSet: TaskSetManager, maxLocality: TaskLocality, shuffledOffers: Seq[WorkerOffer], availableCpus: Array[Int], tasks: Seq[ArrayBuffer[TaskDescription]]) : Boolean = {
} catch { case e: TaskNotSerializableException => logError(s"Resource offer failed, task set ${taskSet.name} was not serializable") // Do not offer resources for this task, but don't throw an error to allow other // task sets to be submitted. return launchedTask
var launchedTask = false for (i <- 0 until shuffledOffers.size) {
val execId = shuffledOffers(i).executorId val host = shuffledOffers(i).host if (availableCpus(i) >= CPUS_PER_TASK) {
executorIdToHost(o.executorId) = o.host activeExecutorIds += o.executorId if (!executorsByHost.contains(o.host)) {
executorsByHost(o.host) = new HashSet[String]() executorAdded(o.executorId, o.host) newExecAvail = true } for (rack <- getRackForHost(o.host)) { hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += o.host } }
return launchedTask }
在这个函数中分配 execId.对于所有的 WorkerOffer(executorId, host, cores), 如果可以获取的 CPU 的数量大于完成每个任务所需要的 CPU 数量,就调用了 TaskSetManager 的 resourceOffer()方法,返回一个 task.其中,每个 task 需要的 CPU 个数是从配置文件中读取的,默认为1.然后,该 executor 的 availableCpus -= CPUS_PER_TASK,且 executor 的 availableCpus>=0,否则,换下一个 executor.进行整 个循环,直到所有任务成功发射,返回 true,否则,返回 false;
// Launch tasks returned by a set of resource offers private def launchTasks(tasks: Seq[Seq[TaskDescription]]) {
for (task <- tasks.flatten) {
else { val executorData = executorDataMap(task.executorId) executorData.freeCores -= scheduler.CPUS_PER_TASK executorData.executorEndpoint.send(LaunchTask(new SerializableBuffer(serializedTask)))
} return tasks }
private def resourceOfferSingleTaskSet( taskSet: TaskSetManager, maxLocality: TaskLocality, shuffledOffers: Seq[WorkerOffer], availableCpus: Array[Int], tasks: Seq[ArrayBuffer[TaskDescription]]) : Boolean = {
logDebug("parentName: %s, name: %s, runningTasks: %s".format( taskSet.parent.name, taskSet.name, taskSet.runningTasks))
if (newExecAvail) { taskSet.executorAdded()
然后不断 调用自己的 resourceOfferSingleTaskSet()方法,直到 taskSet 发射成功。
/** * Called by cluster manager to offer resources on slaves. We respond by asking our active task * sets for tasks in order of priority. We fill each node with tasks in a round-robin manner so * that tasks are balanced across the cluster. */ def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = synchronized { // Mark each slave as alive and remember its hostname // Also track if new executor is added var newExecAvail = false for (o <- offers) {
}
lancheTasks()方法的参数是 TaskBaidu Nhomakorabeaescription 的 seq 类型,对于其中的每个 task,序列 化,executorData.freeCores -= 然后发射
CoarseGrainedSchedulerBackend 的 makeOffers()调用 TaskSchedulerImpl 的 resourceOffers()
val workOffers = activeExecutors.map { case (id, executorData) => new WorkerOffer(id, executorData.executorHost, executorData.freeCores)
}.toSeq launchTasks(scheduler.resourceOffers(workOffers))
// Make fake resource offers on all executors
private def makeOffers() { // Filter out executors under killing val activeExecutors = executorDataMap.filterKeys(!executorsPendingToRemove.contains(_)) val workOffers = activeExecutors.map { case (id, executorData) => new WorkerOffer(id, executorData.executorHost, executorData.freeCores) }.toSeq launchTasks(scheduler.resourceOffers(workOffers))
try { for (task <- taskSet.resourceOffer(execId, host, maxLocality)) { tasks(i) += task val tid = task.taskId taskIdToTaskSetManager(tid) = taskSet taskIdToExecutorId(tid) = execId executorsByHost(host) += execId availableCpus(i) -= CPUS_PER_TASK assert(availableCpus(i) >= 0) launchedTask = true }
} }
// Take each TaskSet in our scheduling order, and then offer it each node in increasing order // of locality levels so that it gets a chance to launch local tasks on all of them. // NOTE: the preferredLocality order: PROCESS_LOCAL, NODE_LOCAL, NO_PREF, RACK_LOCAL, ANY var launchedTask = false for (taskSet <- sortedTaskSets; maxLocality <- taskSet.myLocalityLevels) {
其中 TaskSetManager 类的 resourceOffer()方法调用流程
CoarseGrainedSchedulerBackend 的 override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = 方法,注册 executor,包括 executorId,hostPort 和 cores.形成一个 WorkOffer 的列表,并发射任务
do {
launchedTask = resourceOfferSingleTaskSet( taskSet, maxLocality, shuffledOffers, availableCpus, tasks)
} while (launchedTask) }
if (tasks.size > 0) { hasLaunchedTask = true
}
先对所有的 WordOffer 进行 random Shuffle 打乱顺序,并对 TaskSet 进行排序
// Take each TaskSet in our scheduling order, and then offer it each node in increasing order // of locality levels so that it gets a chance to launch local tasks on all of them. // NOTE: the preferredLocality order: PROCESS_LOCAL, NODE_LOCAL, NO_PREF, RACK_LOCAL, ANY
// Randomly shuffle offers to avoid always placing tasks on the same set of workers. val shuffledOffers = Random.shuffle(offers) // Build a list of tasks to assign to each worker. val tasks = shuffledOffers.map(o => new ArrayBuffer[TaskDescription](o.cores)) val availableCpus = shuffledOffers.map(o => o.cores).toArray val sortedTaskSets = rootPool.getSortedTaskSetQueue for (taskSet <- sortedTaskSets) {
private def resourceOfferSingleTaskSet( taskSet: TaskSetManager, maxLocality: TaskLocality, shuffledOffers: Seq[WorkerOffer], availableCpus: Array[Int], tasks: Seq[ArrayBuffer[TaskDescription]]) : Boolean = {
} catch { case e: TaskNotSerializableException => logError(s"Resource offer failed, task set ${taskSet.name} was not serializable") // Do not offer resources for this task, but don't throw an error to allow other // task sets to be submitted. return launchedTask
var launchedTask = false for (i <- 0 until shuffledOffers.size) {
val execId = shuffledOffers(i).executorId val host = shuffledOffers(i).host if (availableCpus(i) >= CPUS_PER_TASK) {
executorIdToHost(o.executorId) = o.host activeExecutorIds += o.executorId if (!executorsByHost.contains(o.host)) {
executorsByHost(o.host) = new HashSet[String]() executorAdded(o.executorId, o.host) newExecAvail = true } for (rack <- getRackForHost(o.host)) { hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += o.host } }