diff -urN globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager.h globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager.h --- globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager.h Fri Jul 25 23:48:38 2003 +++ globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager.h Sun Jan 4 00:59:10 2004 @@ -158,11 +158,21 @@ * The state of the job. This corresponds to the job state machine * described in the GRAM documentation. * + * Use globus_gram_job_manager_request_set_status() to change. + * * @todo add link */ globus_gram_protocol_job_state_t status; /** + * Last time status was changed + * + * The time that the status member was last changed. + * Automatically set by globus_gram_job_manager_request_set_status(). + */ + time_t status_update_time; + + /** * Job Failure Reason * * If the state is GLOBUS_GRAM_STATE_FAILED, then this @@ -395,6 +405,17 @@ globus_gram_job_manager_request_destroy( globus_gram_jobmanager_request_t * request); +int +globus_gram_job_manager_request_set_status( + globus_gram_jobmanager_request_t * request, + globus_gram_protocol_job_state_t status); + +int +globus_gram_job_manager_request_set_status_time( + globus_gram_jobmanager_request_t * request, + globus_gram_protocol_job_state_t status, + time_t valid_time); + void globus_gram_job_manager_request_open_logfile( globus_gram_jobmanager_request_t * request, @@ -521,6 +542,13 @@ FILE * fp); int +globus_gram_job_manager_output_get_size( + globus_gram_jobmanager_request_t * request, + const char * type, + globus_off_t * size + ); + +int globus_gram_job_manager_output_check_size( globus_gram_jobmanager_request_t * request, const char * type, diff -urN globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_gsi.c globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_gsi.c --- globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_gsi.c Fri Mar 28 23:52:00 2003 +++ globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_gsi.c Sun Jan 4 00:59:10 2004 @@ -189,7 +189,8 @@ { if ((int) lifetime - request->proxy_timeout <= 0) { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, + GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_USER_PROXY_EXPIRED; rc = GLOBUS_FAILURE; diff -urN globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_output.c globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_output.c --- globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_output.c Fri Mar 28 23:52:00 2003 +++ globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_output.c Sun Jan 4 00:59:10 2004 @@ -765,6 +765,44 @@ /* globus_gram_job_manager_output_get_cache_name() */ /** + * Get size of standard out + * + * @param request + * Request that we are checking the size of the output file of. + * @param type + * Must be either "stdout" or "stderr". + * @param size + * Pointer to memory to hold result. + * + * @retval GLOBUS_SUCCESS (or GLOBUS_FAILURE if you pass in bogus arguments) + */ +int +globus_gram_job_manager_output_get_size( + globus_gram_jobmanager_request_t * request, + const char * type, + globus_off_t * size + ) +{ + if(!size || !request || !request->output) { + return GLOBUS_FAILURE; + } + + if(strcmp(type, "stdout") == 0) + { + *size = request->output->stdout_size; + return GLOBUS_SUCCESS; + } + else if(strcmp(type, "stderr") == 0) + { + *size = request->output->stderr_size; + return GLOBUS_SUCCESS; + } + + return GLOBUS_FAILURE; +} +/* globus_gram_job_manager_output_get_size() */ + +/** * Check size of output files * * Verifies that the size of the stdout or stderr file is exactly @a size diff -urN globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_query.c globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_query.c --- globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_query.c Tue Mar 25 15:31:26 2003 +++ globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_query.c Sun Jan 4 00:59:10 2004 @@ -319,7 +319,7 @@ case GLOBUS_GRAM_JOB_MANAGER_STATE_PROXY_RELOCATE: request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_EARLY_FAILED; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_USER_CANCELLED; return GLOBUS_SUCCESS; @@ -327,7 +327,7 @@ case GLOBUS_GRAM_JOB_MANAGER_STATE_TWO_PHASE: request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_USER_CANCELLED; request->unsent_status_change = GLOBUS_TRUE; if(request->poll_timer != GLOBUS_HANDLE_TABLE_NO_HANDLE) @@ -354,7 +354,7 @@ case GLOBUS_GRAM_JOB_MANAGER_STATE_STAGE_IN: request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_USER_CANCELLED; request->unsent_status_change = GLOBUS_TRUE; @@ -710,6 +710,33 @@ "%"GLOBUS_OFF_T_FORMAT" %"GLOBUS_OFF_T_FORMAT, &out_size, &err_size) > 0) { + if( ! request || !(request->output) ) { + globus_gram_job_manager_request_log( + request, + "JM: ***** STDIO_SIZE request. " + "BOGUS REQUEST OBJECT!\n"); + } + else + { + globus_off_t local_size_stdout = 0; + globus_off_t local_size_stderr = 0; + int rc_out, rc_err; + rc_out = globus_gram_job_manager_output_get_size(request, + "stdout", &local_size_stdout); + rc_err = globus_gram_job_manager_output_get_size(request, + "stderr", &local_size_stderr); + globus_gram_job_manager_request_log( + request, + "JM: STDIO_SIZE request. " + "stdout: remote %d, local %d%s, %s." + "stderr: remote %d, local %d%s, %s.\n", + (int)(out_size), (int)local_size_stdout, + (rc_out == GLOBUS_SUCCESS) ? "" : " error querying local value", + (out_size == local_size_stdout)?"ok":"ERROR", + (int)(err_size), (int)local_size_stderr, + (rc_err == GLOBUS_SUCCESS) ? "" : " error querying local value", + (err_size == local_size_stderr)?"ok":"ERROR"); + } if(out_size >= 0) { rc = globus_gram_job_manager_output_check_size( @@ -804,7 +831,7 @@ case GLOBUS_GRAM_JOB_MANAGER_STATE_STDIO_UPDATE_CLOSE: case GLOBUS_GRAM_JOB_MANAGER_STATE_STDIO_UPDATE_OPEN: case GLOBUS_GRAM_JOB_MANAGER_STATE_STOP: - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->unsent_status_change = GLOBUS_TRUE; request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_JM_STOPPED; request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_STOP; @@ -812,14 +839,14 @@ case GLOBUS_GRAM_JOB_MANAGER_STATE_PRE_CLOSE_OUTPUT: case GLOBUS_GRAM_JOB_MANAGER_STATE_CLOSE_OUTPUT: case GLOBUS_GRAM_JOB_MANAGER_STATE_STOP_CLOSE_OUTPUT: - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->unsent_status_change = GLOBUS_TRUE; request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_JM_STOPPED; request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_STOP_CLOSE_OUTPUT; break; case GLOBUS_GRAM_JOB_MANAGER_STATE_STAGE_OUT: - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->unsent_status_change = GLOBUS_TRUE; request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_JM_STOPPED; request->jobmanager_state = @@ -830,7 +857,7 @@ case GLOBUS_GRAM_JOB_MANAGER_STATE_STOP_DONE: case GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED_TWO_PHASE: case GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED_TWO_PHASE_COMMITTED: - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->unsent_status_change = GLOBUS_TRUE; request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_JM_STOPPED; request->jobmanager_state = diff -urN globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_request.c globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_request.c --- globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_request.c Fri Mar 28 23:52:00 2003 +++ globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_request.c Sun Jan 4 00:59:10 2004 @@ -55,6 +55,7 @@ r->condor_os = NULL; r->condor_arch = NULL; r->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_UNSUBMITTED; + r->status_update_time = 0; r->url_base = GLOBUS_NULL; r->job_contact = GLOBUS_NULL; r->job_contact_path = GLOBUS_NULL; @@ -142,6 +143,67 @@ } /* globus_gram_job_manager_request_destroy() */ +/** + * Change the status associated with a job request + * + * Changes the status associated with a job request. + * There is now additional tracking data associated with the + * status that must be updated when the status is. This function + * handles managing it. It is NOT recommended that you directly + * change the status. + * + * @param request + * Job request to change status of. + * @param status + * Status to set the job request to. + * + * @return GLOBUS_SUCCESS assuming valid input. + * If the request is null, returns GLOBUS_FAILURE. + */ +int +globus_gram_job_manager_request_set_status( + globus_gram_jobmanager_request_t * request, + globus_gram_protocol_job_state_t status) +{ + return globus_gram_job_manager_request_set_status_time(request, status, + time(0)); +} +/* globus_gram_job_manager_request_set_status() */ + + +/** + * Change the status associated with a job request + * + * Changes the status associated with a job request. + * There is now additional tracking data associated with the + * status that must be updated when the status is. This function + * handles managing it. It is NOT recommended that you directly + * change the status. + * + * @param request + * Job request to change status of. + * @param status + * Status to set the job request to. + * @param valid_time + * The status is known good as of this time (seconds since epoch) + * + * @return GLOBUS_SUCCESS assuming valid input. + * If the request is null, returns GLOBUS_FAILURE. + */ +int +globus_gram_job_manager_request_set_status_time( + globus_gram_jobmanager_request_t * request, + globus_gram_protocol_job_state_t status, + time_t valid_time) +{ + if( ! request ) + return GLOBUS_FAILURE; + request->status = status; + request->status_update_time = valid_time; + return GLOBUS_SUCCESS; +} +/* globus_gram_job_manager_request_set_status() */ + extern void globus_gram_job_manager_request_open_logfile( diff -urN globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_script.c globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_script.c --- globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_script.c Fri Mar 28 23:52:01 2003 +++ globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_script.c Sun Jan 4 00:59:10 2004 @@ -457,6 +457,317 @@ } /* globus_gram_job_manager_script_submit() */ + + +/** + * Set job request status and fire callback so it registers + * + * Avoids actually locking request if possible. + */ +static +int +local_globus_set_status( + globus_gram_jobmanager_request_t * request, + globus_gram_protocol_job_state_t status) +{ + if( ! request ) + return GLOBUS_FAILURE; + + if(request->status != status) + { + globus_mutex_lock(&request->mutex); + globus_gram_job_manager_request_set_status(request, status); + request->unsent_status_change = GLOBUS_TRUE; + globus_mutex_unlock(&request->mutex); + } + + { + /* Globus expects the callback to fire. Force it to + * run instantly. */ + globus_reltime_t delay; + GlobusTimeReltimeSet(delay, 0, 0); + globus_callback_register_oneshot( + &request->poll_timer, + &delay, + globus_gram_job_manager_state_machine_callback, + request); + } + return GLOBUS_SUCCESS; +} +/* local_globus_set_status() */ + + +/** + * Modified job_contact in place to remove the port. + */ +static void job_contact_strip_port( + char * job_contact) +{ + char * first_end; + char * second_begin; + + if( job_contact == 0 ) + return; + + first_end = strrchr( job_contact, ':' ); + if( first_end == 0 ) /* malformed job_contact? */ + return; + + second_begin = strchr( first_end, '/' ); + if( second_begin == 0 ) /* malformed job_contact? */ + return; + + memmove(first_end, second_begin, strlen(second_begin) + 1); +} + + + +/** + * Try to poll status of job request using Condor grid_manager_monitor_agent + * + * If the Condor grid_manager_monitor_agent is running on the machine, this + * function retrieve job request status using that, otherwise it fails. + * Expected to be called exclusively from globus_gram_job_manager_script_poll. + */ +int +globus_gram_job_manager_script_poll_fast( + globus_gram_jobmanager_request_t * request) +{ + char * grid_monitor_output = 0; + /* Path is $GLOBUS_LOCATION/GRID_MONITOR_LOCATION$UID */ + char * GRID_MONITOR_LOCATION = "/tmp/grid_manager_monitor_agent_log."; + const char * WHITESPACE = " \t"; + uid_t this_uid = geteuid(); + struct stat stat_results; + FILE * grid_monitor_file = 0; + int rc; + time_t MAX_MONITOR_FILE_AGE = (60*5); /* seconds */ + char line[1024]; + char line_job_contact[1024]; + int return_val = GLOBUS_FAILURE; + time_t status_file_last_update = 0; + const int DEBUG_FAST_POLL = 0; /* Set to 1 for extra log info */ + char * job_contact_match = 0; + + if( ! request || ! request->globus_location || ! request->job_contact) + goto FAST_POLL_EXIT_FAILURE; + + if(this_uid > 999999) + { + /* UIDs this large are unlikely, but if they occur the buffer + * isn't large enough to handle it + */ + goto FAST_POLL_EXIT_FAILURE; + } + + grid_monitor_output = globus_libc_malloc( + strlen(request->globus_location) + + strlen(GRID_MONITOR_LOCATION) + 10); + if( ! grid_monitor_output) + goto FAST_POLL_EXIT_FAILURE; + + sprintf(grid_monitor_output, "%s%s%d", + request->globus_location, + GRID_MONITOR_LOCATION, + (int)this_uid); + + grid_monitor_file = fopen(grid_monitor_output, "r"); + + if( ! grid_monitor_file ) + { + /* No monitor file? That's acceptable, silently fail */ + goto FAST_POLL_EXIT_FAILURE; + } + + rc = stat(grid_monitor_output, &stat_results); + + + if( rc != 0 ) + goto FAST_POLL_EXIT_FAILURE; + + if(stat_results.st_uid != this_uid || + !S_ISREG(stat_results.st_mode) + /* TODO: test for world writable (bad)? Is such a test logical on AFS? */ + ) + { + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: Monitoring file looks untrustworthy. " + "Reverting to normal polling\n"); + goto FAST_POLL_EXIT_FAILURE; + } + + if( (stat_results.st_mtime + MAX_MONITOR_FILE_AGE) < time(NULL) ) + { + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: Monitoring file looks out of date. " + "Reverting to normal polling\n"); + goto FAST_POLL_EXIT_FAILURE; + } + + /* If we got this far, we've decided we trust the file */ + + /* Read the first line, which is two timestamps as seconds since epoch. + * The first one is start time of last query pass, the second is finish. */ + if( ! fgets(line, sizeof(line), grid_monitor_file) ) + { + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: Job state file %s malformed, missing first line\n", + grid_monitor_output); + goto FAST_POLL_EXIT_FAILURE; + } + if( ! feof(grid_monitor_file) && line[strlen(line) - 1] != '\n') + { + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: Job state file %s malformed, first line too long\n", + grid_monitor_output); + goto FAST_POLL_EXIT_FAILURE; + } + + status_file_last_update = atoi(line); + if(status_file_last_update < request->status_update_time) { + /* We somehow got a status update more recent than the status file. + * Most likely we successfully executed a traditional poll faster than + * the status script processed things. This status file is fresh + * enough, so we should switch over to using that, we want to avoid + * firing off a traditional poll. So, leave the existing status in + * place and report a successful poll. */ + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: **** Job state file %s older than my last " + "known status. Using last known status. (%d < %d)\n", + grid_monitor_output, + status_file_last_update, request->status_update_time); + local_globus_set_status(request, request->status); + return_val = GLOBUS_SUCCESS; + goto FAST_POLL_EXIT; + } + + job_contact_match = globus_libc_malloc(strlen(request->job_contact) + 1); + strcpy(job_contact_match, request->job_contact); + job_contact_strip_port(job_contact_match); + + if(DEBUG_FAST_POLL) + { + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: ******* seeking: %s in %s\n", job_contact_match, + grid_monitor_output); + } + /* TODO: First pass. Improve with binary search of file to make + * scanning large files fast. Still this is probably plenty fast + * enough for fairly large runs. */ + while( 1 ) + { + size_t len = 0; + char * line_bit = line; + if( ! fgets(line, sizeof(line), grid_monitor_file) ) + { + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: ******** " + "Failed to find %s\n", job_contact_match); + /* end of file (or error), job isn't in file. It might just not + * have been noticed yet. Silently skip */ + goto FAST_POLL_EXIT_FAILURE; + } + if( ! feof(grid_monitor_file) && line[strlen(line) - 1] != '\n') + { + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: Monitoring file looks corrupt, " + "lines are too long. " + "Reverting to normal polling\n"); + goto FAST_POLL_EXIT_FAILURE; + } + + len = strcspn(line_bit, WHITESPACE); + if(len == 0) + { + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: Monitoring file looks corrupt, " + "line doesn't start with job contact string. " + "Reverting to normal polling\n"); + goto FAST_POLL_EXIT_FAILURE; + } + + /* So long as sizeof(line_job_contact) == sizeof(line), + * this is safe */ + memcpy(line_job_contact, line, len); + line_job_contact[len] = 0; + job_contact_strip_port(line_job_contact); + + if( strcmp(line_job_contact, job_contact_match) != 0 ) + { + if(DEBUG_FAST_POLL) + { + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: no match: %s\n", + line); + } + continue; + } + + line_bit += len; + + len = strspn(line_bit, WHITESPACE); + if(len == 0) + { + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: Monitoring file looks corrupt, " + "missing whitespace field seperator. " + "Reverting to normal polling\n"); + goto FAST_POLL_EXIT_FAILURE; + } + + line_bit += len; + + /* Found exact match, read status */ + len = strspn(line_bit, "0123456789"); + if(len == 0) + { + /* No digits!? */ + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: Monitoring file looks corrupt, " + "non numeric status. " + "Reverting to normal polling\n"); + goto FAST_POLL_EXIT_FAILURE; + } + + local_globus_set_status(request, atoi(line_bit)); + + if(DEBUG_FAST_POLL) + { + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: ******** " + "OK. found %s (%s)\n", job_contact_match, line_job_contact); + } + + return_val = GLOBUS_SUCCESS; + goto FAST_POLL_EXIT; + } + +FAST_POLL_EXIT_FAILURE: + return_val = GLOBUS_FAILURE; + +FAST_POLL_EXIT: + if(grid_monitor_file) + fclose(grid_monitor_file); + if( grid_monitor_output ) + globus_libc_free(grid_monitor_output); + if( job_contact_match ) + globus_libc_free(job_contact_match); + + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: returning %d = %s\n", return_val, + (return_val == GLOBUS_FAILURE) ? + "GLOBUS_FAILURE (try Perl scripts)" : + "GLOBUS_SUCCESS (skip Perl scripts)"); + return return_val; +} +/* globus_gram_job_manager_script_poll_fast() */ + + + + + + + /** * Poll the status of a job request. * @@ -482,6 +793,12 @@ char * stderr_filename = "/dev/null"; char * script_arg_file; + /* Keep the state file's timestamp up to date so that + * anything scrubbing the state files of old and dead + * processes leaves it alone */ + if(request->job_state_file) + utime(request->job_state_file, NULL); + script_arg_file = tempnam(NULL, "gram_poll"); if (!request) @@ -509,12 +826,19 @@ globus_gram_job_manager_request_log(request, "JMI: local stderr filename = %s.\n", stderr_filename); + globus_gram_job_manager_request_log(request, + "JMI: poll: seeking: %s\n", request->job_contact); + if( globus_gram_job_manager_script_poll_fast(request) == GLOBUS_SUCCESS ) + { + return(GLOBUS_SUCCESS); + } + if ((script_arg_fp = fopen(script_arg_file, "w")) == NULL) { globus_gram_job_manager_request_log(request, "JMI: Failed to open gram script argument file. %s\n", script_arg_file ); - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_ARG_FILE_CREATION_FAILED; return(GLOBUS_FAILURE); @@ -1027,7 +1351,7 @@ globus_gram_job_manager_request_log(request, "JMI: Failed to open gram script argument file. %s\n", script_arg_file ); - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_ARG_FILE_CREATION_FAILED; return(GLOBUS_FAILURE); @@ -1275,7 +1599,7 @@ else if(globus_l_gram_job_manager_script_valid_state_change( request, script_status)) { - request->status = script_status; + globus_gram_job_manager_request_set_status(request, script_status); request->unsent_status_change = GLOBUS_TRUE; } } @@ -1285,7 +1609,7 @@ if(request->jobmanager_state == starting_jobmanager_state) { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); if(script_status <= 0) { request->failure_code = @@ -1340,7 +1664,7 @@ } else if(request->jobmanager_state == starting_jobmanager_state) { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_INVALID_SCRIPT_STATUS; request->unsent_status_change = GLOBUS_TRUE; @@ -1407,7 +1731,7 @@ request, script_status))) { request->unsent_status_change = GLOBUS_TRUE; - request->status = script_status; + globus_gram_job_manager_request_set_status(request, script_status); if(request->status == GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED) { request->failure_code = @@ -1423,7 +1747,7 @@ script_status)) { - request->status = script_status; + globus_gram_job_manager_request_set_status(request, script_status); request->unsent_status_change = GLOBUS_TRUE; } } diff -urN globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_state.c globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_state.c --- globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_state.c Fri Jul 25 23:48:39 2003 +++ globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_state.c Sun Jan 4 01:50:46 2004 @@ -151,7 +151,7 @@ fprintf(stderr, "ERROR: unable to get HOME from the environment\n"); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_GATEKEEPER_MISCONFIGURED; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED_DONE; break; @@ -182,7 +182,7 @@ request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_GATEKEEPER_MISCONFIGURED; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED_DONE; break; @@ -205,7 +205,7 @@ request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_GATEKEEPER_MISCONFIGURED; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED_DONE; break; @@ -217,7 +217,7 @@ request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_GATEKEEPER_MISCONFIGURED; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED_DONE; break; @@ -232,7 +232,7 @@ request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_CONDOR_ARCH; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED_DONE; break; @@ -244,7 +244,7 @@ "jobmanager type is condor\n"); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_CONDOR_ARCH; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED_DONE; } @@ -265,7 +265,7 @@ if(rc != GLOBUS_SUCCESS) { request->failure_code = rc; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED_DONE; break; @@ -578,7 +578,7 @@ rc = globus_rsl_eval(request->rsl, &request->symbol_table); if(rc != GLOBUS_SUCCESS) { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_RSL_EVALUATION_FAILED; request->jobmanager_state = @@ -594,7 +594,7 @@ GLOBUS_GRAM_VALIDATE_JOB_MANAGER_RESTART); if(rc != GLOBUS_SUCCESS) { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = rc; request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_EARLY_FAILED; @@ -611,7 +611,7 @@ rc = globus_rsl_eval(request->rsl, &request->symbol_table); if(rc != GLOBUS_SUCCESS) { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_RSL_EVALUATION_FAILED; request->jobmanager_state = @@ -642,7 +642,7 @@ rc = globus_gram_job_manager_state_file_read(request); if(rc != GLOBUS_SUCCESS) { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = rc; request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_EARLY_FAILED; @@ -652,7 +652,7 @@ rc = globus_rsl_assist_attributes_canonicalize(request->rsl); if(rc != GLOBUS_SUCCESS) { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_BAD_RSL; request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_EARLY_FAILED; @@ -690,7 +690,7 @@ rc = globus_rsl_assist_attributes_canonicalize(request->rsl); if(rc != GLOBUS_SUCCESS) { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_BAD_RSL; request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_EARLY_FAILED; @@ -870,7 +870,7 @@ "Failed to create scratch dir\n"); request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_EARLY_FAILED; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); if(request->failure_code == GLOBUS_SUCCESS) { request->failure_code = @@ -1065,7 +1065,7 @@ { request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_EARLY_FAILED; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = rc; } break; @@ -1186,7 +1186,7 @@ { request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_EARLY_FAILED; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_WRITING_STATE_FILE; globus_gram_job_manager_request_log( request, @@ -1285,7 +1285,7 @@ if(globus_gram_job_manager_rsl_need_stage_in(request)) { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_STAGE_IN; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_STAGE_IN); if(!request->dry_run) { @@ -1344,7 +1344,7 @@ if(rc != GLOBUS_SUCCESS) { request->failure_code = rc; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); if(!request->dry_run) { @@ -1378,7 +1378,7 @@ request->failure_code = GLOBUS_GRAM_PROTOCOL_ERROR_SUBMIT_UNKNOWN; } - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->unsent_status_change = GLOBUS_TRUE; } globus_gram_job_manager_reporting_file_create(request); @@ -1424,7 +1424,7 @@ /* Job finished! start finalizing */ if(globus_gram_job_manager_rsl_need_stage_out(request)) { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_STAGE_OUT; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_STAGE_OUT); globus_gram_job_manager_contact_state_callback(request); } request->jobmanager_state = @@ -1763,7 +1763,7 @@ } else { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = rc; if(request->jobmanager_state == @@ -1858,7 +1858,7 @@ else { request->failure_code = rc; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); if(request->jobmanager_state != GLOBUS_GRAM_JOB_MANAGER_STATE_EARLY_FAILED_FILE_CLEAN_UP) @@ -1910,7 +1910,7 @@ } else { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->failure_code = rc; if(request->jobmanager_state != @@ -1952,7 +1952,7 @@ else if(rc != GLOBUS_SUCCESS && request->failure_code == 0) { request->failure_code = rc; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); if(request->jobmanager_state == GLOBUS_GRAM_JOB_MANAGER_STATE_CACHE_CLEAN_UP) @@ -2033,7 +2033,7 @@ * This callback is delayed until after the close output is completed, * so that clients won't exit before the output is sent. */ - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); globus_gram_job_manager_contact_state_callback(request); @@ -2120,21 +2120,21 @@ { request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_TWO_PHASE_END; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_DONE; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_DONE); } else if(request->jobmanager_state == GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED_CLOSE_OUTPUT) { request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED_TWO_PHASE; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); } else if(request->jobmanager_state == GLOBUS_GRAM_JOB_MANAGER_STATE_EARLY_FAILED_CLOSE_OUTPUT) { request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_EARLY_FAILED_PRE_FILE_CLEAN_UP; - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); break; } @@ -2280,7 +2280,7 @@ if(major_status != GSS_S_COMPLETE) { - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED; rc = GLOBUS_GRAM_PROTOCOL_ERROR_PROTOCOL_FAILED; } @@ -2292,7 +2292,7 @@ "JM: couldn't send job contact to client: rc=%d (%s)\n", rc, globus_gram_protocol_error_string(rc)); - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED; } gss_delete_sec_context(&minor_status, @@ -2492,7 +2492,7 @@ case GLOBUS_GRAM_JOB_MANAGER_STATE_CLOSE_OUTPUT: case GLOBUS_GRAM_JOB_MANAGER_STATE_PRE_CLOSE_OUTPUT: case GLOBUS_GRAM_JOB_MANAGER_STATE_STAGE_OUT: - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_DONE; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_DONE); request->unsent_status_change = GLOBUS_TRUE; request->first_poll = GLOBUS_TRUE; request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_POLL1; @@ -2500,7 +2500,7 @@ break; case GLOBUS_GRAM_JOB_MANAGER_STATE_EARLY_FAILED: case GLOBUS_GRAM_JOB_MANAGER_STATE_FAILED: - request->status = GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED; + globus_gram_job_manager_request_set_status(request, GLOBUS_GRAM_PROTOCOL_JOB_STATE_FAILED); request->unsent_status_change = GLOBUS_TRUE; request->jobmanager_state = GLOBUS_GRAM_JOB_MANAGER_STATE_POLL1; request->first_poll = GLOBUS_TRUE; diff -urN globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_state_file.c globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_state_file.c --- globus_2_4_3_adv2003_fix892_fix956/gram/jobmanager/source/globus_gram_job_manager_state_file.c Mon Mar 10 17:34:24 2003 +++ globus_2_4_3_adv2003_fix892_fix956_more/gram/jobmanager/source/globus_gram_job_manager_state_file.c Sun Jan 4 00:59:10 2004 @@ -269,7 +269,8 @@ { goto error_exit; } - request->status = atoi( buffer ); + globus_gram_job_manager_request_set_status_time(request, + atoi( buffer ), statbuf.st_mtime); if (fgets( buffer, sizeof(buffer), fp ) == NULL) { goto error_exit;